Skip to content

Commit 668987a

Browse files
committed
FEAT-#1201: pivot implementation via unstack
Signed-off-by: Dmitry Chigarev <dmitry.chigarev@intel.com>
1 parent dd42a40 commit 668987a

File tree

6 files changed

+94
-16
lines changed

6 files changed

+94
-16
lines changed

docs/supported_apis/dataframe_supported.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ default to pandas.
268268
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
269269
| ``pipe`` | `pipe`_ | Y | |
270270
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
271-
| ``pivot`` | `pivot`_ | D | |
271+
| ``pivot`` | `pivot`_ | Y | |
272272
+----------------------------+---------------------------+------------------------+----------------------------------------------------+
273273
| ``pivot_table`` | `pivot_table`_ | D | |
274274
+----------------------------+---------------------------+------------------------+----------------------------------------------------+

modin/backends/base/query_compiler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,6 +1198,10 @@ def groupby_agg(self, by, axis, agg_func, groupby_args, agg_args):
11981198
def unstack(self, level, fill_value):
11991199
pass
12001200

1201+
@abc.abstractmethod
1202+
def pivot(self, index, columns, values):
1203+
pass
1204+
12011205
@abc.abstractmethod
12021206
def get_dummies(self, columns, **kwargs):
12031207
"""Convert categorical variables to dummy variables for certain columns.

modin/backends/pandas/query_compiler.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,6 +2236,55 @@ def compute_groupby(df):
22362236

22372237
# END Manual Partitioning methods
22382238

2239+
def pivot(self, index, columns, values):
2240+
from pandas.core.reshape.pivot import _convert_by
2241+
2242+
def __convert_by(by):
2243+
if isinstance(by, pandas.Index):
2244+
by = list(by)
2245+
by = _convert_by(by)
2246+
if (
2247+
len(by) > 0
2248+
and (not is_list_like(by[0]) or isinstance(by[0], tuple))
2249+
and not all([key in self.columns for key in by])
2250+
):
2251+
by = [by]
2252+
return by
2253+
2254+
index, columns, values = map(__convert_by, [index, columns, values])
2255+
is_custom_index = (
2256+
len(index) == 1
2257+
and is_list_like(index[0])
2258+
and not isinstance(index[0], tuple)
2259+
)
2260+
2261+
if is_custom_index or len(index) == 0:
2262+
to_reindex = columns
2263+
else:
2264+
to_reindex = index + columns
2265+
2266+
if len(values) != 0:
2267+
obj = self.getitem_column_array(to_reindex + values)
2268+
else:
2269+
obj = self
2270+
2271+
if is_custom_index:
2272+
obj.index = index
2273+
2274+
reindexed = self.__constructor__(
2275+
obj._modin_frame._apply_full_axis(
2276+
1,
2277+
lambda df: df.set_index(to_reindex, append=(len(to_reindex) == 1)),
2278+
new_columns=obj.columns.drop(to_reindex),
2279+
)
2280+
)
2281+
2282+
unstacked = reindexed.unstack(level=columns, fill_value=None)
2283+
if len(reindexed.columns) == 1 and unstacked.columns.nlevels > 1:
2284+
unstacked.columns = unstacked.columns.droplevel(0)
2285+
2286+
return unstacked
2287+
22392288
# Get_dummies
22402289
def get_dummies(self, columns, **kwargs):
22412290
"""Convert categorical variables to dummy variables for certain columns.

modin/pandas/dataframe.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,8 +1712,27 @@ def unstack(self, level=-1, fill_value=None):
17121712
)
17131713

17141714
def pivot(self, index=None, columns=None, values=None):
1715-
return self._default_to_pandas(
1716-
pandas.DataFrame.pivot, index=index, columns=columns, values=values
1715+
"""
1716+
Return reshaped DataFrame organized by given index / column values.
1717+
Reshape data (produce a "pivot" table) based on column values. Uses
1718+
unique values from specified `index` / `columns` to form axes of the
1719+
resulting DataFrame.
1720+
Parameters
1721+
----------
1722+
index : str or object, optional
1723+
Column to use to make new frame's index. If None, uses
1724+
existing index.
1725+
columns : str or object
1726+
Column to use to make new frame's columns.
1727+
values : str, object or a list of the previous, optional
1728+
Column(s) to use for populating new frame's values. If not
1729+
specified, all remaining columns will be used and the result will
1730+
have hierarchically indexed columns.
1731+
"""
1732+
return self.__constructor__(
1733+
query_compiler=self._query_compiler.pivot(
1734+
index=index, columns=columns, values=values
1735+
)
17171736
)
17181737

17191738
def pivot_table(

modin/pandas/test/test_dataframe.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,17 +2494,23 @@ def test_pct_change(self):
24942494
with pytest.warns(UserWarning):
24952495
pd.DataFrame(data).pct_change()
24962496

2497-
def test_pivot(self):
2498-
df = pd.DataFrame(
2499-
{
2500-
"foo": ["one", "one", "one", "two", "two", "two"],
2501-
"bar": ["A", "B", "C", "A", "B", "C"],
2502-
"baz": [1, 2, 3, 4, 5, 6],
2503-
"zoo": ["x", "y", "z", "q", "w", "t"],
2504-
}
2497+
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
2498+
@pytest.mark.parametrize(
2499+
"index", [lambda df: df.columns[0], lambda df: df[df.columns[0]].values, None]
2500+
)
2501+
@pytest.mark.parametrize("columns", [lambda df: df.columns[len(df.columns) // 2]])
2502+
@pytest.mark.parametrize(
2503+
"values", [lambda df: df.columns[-1], lambda df: df.columns[-2:], None]
2504+
)
2505+
def test_pivot(self, data, index, columns, values):
2506+
eval_general(
2507+
*create_test_dfs(data),
2508+
lambda df, *args, **kwargs: df.pivot(*args, **kwargs),
2509+
index=index,
2510+
columns=columns,
2511+
values=values,
2512+
check_exception_type=None,
25052513
)
2506-
with pytest.warns(UserWarning):
2507-
df.pivot(index="foo", columns="bar", values="baz")
25082514

25092515
def test_pivot_table(self):
25102516
df = pd.DataFrame(

modin/pandas/test/test_general.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,9 @@ def test_pivot():
225225
"zoo": ["x", "y", "z", "q", "w", "t"],
226226
}
227227
)
228-
with pytest.warns(UserWarning):
229-
df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
230-
assert isinstance(df, pd.DataFrame)
228+
229+
df = pd.pivot(test_df, index="foo", columns="bar", values="baz")
230+
assert isinstance(df, pd.DataFrame)
231231

232232
with pytest.raises(ValueError):
233233
pd.pivot(test_df["bar"], index="foo", columns="bar", values="baz")

0 commit comments

Comments
 (0)