Skip to content

Commit

Permalink
tune implementation for square-like frames
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
  • Loading branch information
dchigarev committed Aug 9, 2023
1 parent 0b49608 commit e13e4ee
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from pandas.core.groupby.base import transformation_kernels

from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
from modin.config import ExperimentalGroupbyImpl
from modin.config import ExperimentalGroupbyImpl, CpuCount
from modin.error_message import ErrorMessage
from modin.utils import (
try_cast_to_pandas,
Expand Down Expand Up @@ -2828,10 +2828,19 @@ def setitem_builder(df, internal_indices=[]): # pragma: no cover
# Drop/Dropna
# This will change the shape of the resulting data.
def dropna(self, **kwargs):
if kwargs.get("axis", 0) == 1 and kwargs.get("thresh", no_default) in (
is_column_wise = kwargs.get("axis", 0) == 1
no_thresh_passed = kwargs.get("thresh", no_default) in (
no_default,
None,
):
)
# FIXME: this is a naive workaround for this problem: https://github.com/modin-project/modin/issues/5394
# if there are too many partitions then all non-full-axis implementations start acting very badly.
# The here threshold is pretty random though it works fine on simple scenarios
processable_amount_of_partitions = (
np.prod(self._modin_frame._partitions.shape) < CpuCount.get() * 32
)

if is_column_wise and no_thresh_passed and processable_amount_of_partitions:
how = kwargs.get("how", "any")
subset = kwargs.get("subset")
how = "any" if how in (no_default, None) else how
Expand Down

0 comments on commit e13e4ee

Please sign in to comment.