Skip to content

Commit b8049eb

Browse files
ienkovicharegm
authored andcommitted
REFACTOR-modin-project#2011: move default_to_pandas in groupby to backend (modin-project#2041)
Signed-off-by: ienkovich <ilya.enkovich@intel.com>
1 parent 6c8596d commit b8049eb

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

modin/data_management/functions/groupby_function.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import pandas
1515

1616
from .mapreducefunction import MapReduceFunction
17+
from modin.pandas.utils import try_cast_to_pandas
1718

1819

1920
class GroupbyReduceFunction(MapReduceFunction):
@@ -29,9 +30,13 @@ def caller(
2930
numeric_only=True,
3031
drop=False,
3132
):
32-
assert isinstance(
33-
by, type(query_compiler)
34-
), "Can only use groupby reduce with another Query Compiler"
33+
if not isinstance(by, type(query_compiler)):
34+
by = try_cast_to_pandas(by)
35+
return query_compiler.default_to_pandas(
36+
lambda df: map_func(
37+
df.groupby(by=by, axis=axis, **groupby_args), **map_args
38+
)
39+
)
3540
assert axis == 0, "Can only groupby reduce with axis=0"
3641

3742
if numeric_only:

modin/pandas/groupby.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -455,17 +455,21 @@ def all(self, **kwargs):
455455
)
456456

457457
def size(self):
458-
if is_list_like(self._by) and any(isinstance(o, Series) for o in self._by):
459-
# We don't have good way to handle this right now, fall back to Pandas.
460-
return self._default_to_pandas(lambda df: df.size())
461458
if self._axis == 0:
462459
# Size always works in as_index=True mode so it is necessary to make a
463460
# copy of _kwargs and change as_index in it
464461
kwargs = self._kwargs.copy()
465462
kwargs["as_index"] = True
463+
# Series objects in 'by' mean we couldn't handle the case and transform
464+
# 'by' to a query compiler. In this case we replace column names with
465+
# actual columns to be able to apply goupby to a Series.
466+
if is_list_like(self._by) and any(isinstance(o, Series) for o in self._by):
467+
by = [self._df[o] if isinstance(o, str) else o for o in self._by]
468+
else:
469+
by = self._by
466470
work_object = SeriesGroupBy(
467471
self._df[self._df.columns[0]],
468-
self._by,
472+
by,
469473
self._axis,
470474
drop=False,
471475
idx_name=None,
@@ -653,7 +657,7 @@ def _wrap_aggregation(
653657
DataFrame or Series
654658
Returns the same type as `self._df`.
655659
"""
656-
if not isinstance(self._by, type(self._query_compiler)) or self._axis != 0:
660+
if self._axis != 0:
657661
return self._default_to_pandas(default_func, **kwargs)
658662
# For aggregations, pandas behavior does this for the result.
659663
# For other operations it does not, so we wait until there is an aggregation to

0 commit comments

Comments
 (0)