Skip to content

Commit

Permalink
[DataFrame] Fixing bugs in groupby (ray-project#2031)
Browse files Browse the repository at this point in the history
  • Loading branch information
devin-petersohn authored and robertnishihara committed May 10, 2018
1 parent b79912e commit 89e2eef
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
3 changes: 3 additions & 0 deletions python/ray/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,9 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
elif isinstance(by, compat.string_types):
by = self.__getitem__(by).values.tolist()
elif is_list_like(by):
if isinstance(by, pd.Series):
by = by.values.tolist()

mismatch = len(by) != len(self) if axis == 0 \
else len(by) != len(self.columns)

Expand Down
5 changes: 2 additions & 3 deletions python/ray/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from __future__ import print_function

import pandas.core.groupby
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_list_like
import ray
Expand Down Expand Up @@ -34,7 +33,7 @@ def __init__(self, df, by, axis, level, as_index, sort, group_keys,
self._index_grouped = pd.Series(self._columns, index=self._index)\
.groupby(by=by, sort=sort)

self._keys_and_values = [(k, np.array(v))
self._keys_and_values = [(k, v)
for k, v in self._index_grouped]

self._grouped_partitions = \
Expand All @@ -44,7 +43,7 @@ def __init__(self, df, by, axis, level, as_index, sort, group_keys,
as_index,
sort,
group_keys,
squeeze) + part,
squeeze) + tuple(part.tolist()),
num_return_vals=len(self))
for part in partitions)))

Expand Down

0 comments on commit 89e2eef

Please sign in to comment.