Skip to content

Fix 'observed' kwarg not doing anything on SeriesGroupBy #26463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
May 30, 2019
Merged
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a5d6d1a
Fix 'observed' kwarg not doing anything on SeriesGroupBy
krsnik93 May 19, 2019
41f49f4
Merge branch 'GH24880'
krsnik93 May 19, 2019
2575c41
Wrap long lines
krsnik93 May 19, 2019
1c02d9f
Move tests to test_categorical.py
krsnik93 May 19, 2019
7350472
Merge remote-tracking branch 'upstream/master'
krsnik93 May 20, 2019
0a949d5
Merge branch 'master' into GH24880
krsnik93 May 20, 2019
0e9f473
Parameterized tests for 'observed' kwarg on SeriesGroupBy
krsnik93 May 20, 2019
1ef54f4
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 20, 2019
cd481ad
Split test_groupby_series_observed to utilize fixtures better;Sort im…
krsnik93 May 20, 2019
a515caf
Sort imports in core/groupby/groupby.py
krsnik93 May 20, 2019
ff42dd7
Remove too specific fixtures and adjust tests
krsnik93 May 20, 2019
c22875c
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 21, 2019
cc0b725
Use literal values for indices in tests
krsnik93 May 21, 2019
629a144
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 22, 2019
e4fda22
Use MultiIndex.from_* to construct indices in tests
krsnik93 May 22, 2019
8cfa4a1
Wrap long lines
krsnik93 May 22, 2019
db176de
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 26, 2019
d520952
Enhance docstring for _reindex_output
krsnik93 May 26, 2019
3591dbc
Modify tests to reuse existing fixture
krsnik93 May 27, 2019
f97c8a1
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 27, 2019
d5c9c40
Refactor tests from a class to stand-alone functions
krsnik93 May 27, 2019
ad16db8
Simplify a test, add a docstring for the fixture and drop pd.* prefix…
krsnik93 May 28, 2019
7c525a1
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 28, 2019
e6bca5e
Merge remote-tracking branch 'upstream/master' into GH24880
krsnik93 May 29, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use MultiIndex.from_* to construct indices in tests
  • Loading branch information
krsnik93 committed May 22, 2019
commit e4fda22837922e900947af3e7ffb1a2e195fb5f9
78 changes: 28 additions & 50 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,23 +966,14 @@ def test_shift(fill_value):
assert_equal(res, expected)


@pytest.mark.parametrize('operation', ['agg', 'apply'])
def test_groupby_series_observed_true(df_cat, operation):
@pytest.mark.parametrize('operation, index', [
('agg', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
'b': ['foo', 'bar', 'foo']},
dtype='category'))),
('apply', MultiIndex.from_frame(pd.DataFrame({'a': ['one', 'one', 'two'],
'b': ['foo', 'bar', 'foo']})))])
def test_groupby_series_observed_true(df_cat, operation, index):
# GH 24880
index = {
'agg': MultiIndex(levels=[CategoricalIndex(['one', 'two'],
categories=['one', 'two'],
ordered=False),
CategoricalIndex(['bar', 'foo'],
categories=['bar', 'foo'],
ordered=False)],
codes=[[0, 0, 1], [1, 0, 1]],
names=['a', 'b']),
'apply': MultiIndex(levels=[['one', 'two'], ['bar', 'foo']],
codes=[[0, 0, 1], [1, 0, 1]],
names=['a', 'b'])
}[operation]

expected = pd.Series(data=[3, 3, 4], index=index, name='c')
grouped = df_cat.groupby(['a', 'b'], observed=True)['c']
result = getattr(grouped, operation)(sum)
Expand All @@ -993,48 +984,35 @@ def test_groupby_series_observed_true(df_cat, operation):
@pytest.mark.parametrize('observed', [False, None])
def test_groupby_series_observed_false_or_none(df_cat, observed, operation):
# GH 24880
index, _ = MultiIndex(levels=[CategoricalIndex(['one', 'two'],
categories=['one', 'two'],
ordered=False),
CategoricalIndex(['bar', 'foo'],
categories=['bar', 'foo'],
ordered=False)],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['a', 'b']).sortlevel()
index, _ = MultiIndex.from_product(
[CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False)],
names=['a', 'b']).sortlevel()

expected = pd.Series(data=[3, 3, np.nan, 4], index=index, name='c')
grouped = df_cat.groupby(['a', 'b'], observed=observed)['c']
result = getattr(grouped, operation)(sum)
assert_series_equal(result, expected)


@pytest.mark.parametrize("observed, data", [
(True, [1, 2, 3, 3, 4, 4]),
(False, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
(None, [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
def test_groupby_series_observed_apply_dict(df_cat, observed, data):
@pytest.mark.parametrize("observed, index, data", [
(True, MultiIndex.from_tuples(
[('one', 'foo', 'min'), ('one', 'foo', 'max'),
('one', 'bar', 'min'), ('one', 'bar', 'max'),
('two', 'foo', 'min'), ('two', 'foo', 'max')],
names=['a', 'b', None]), [1, 2, 3, 3, 4, 4]),
(False, MultiIndex.from_product(
[CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
Index(['min', 'max'])],
names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0]),
(None, MultiIndex.from_product(
[CategoricalIndex(['one', 'two'], categories=['one', 'two'], ordered=False),
CategoricalIndex(['bar', 'foo'], categories=['bar', 'foo'], ordered=False),
Index(['min', 'max'])],
names=['a', 'b', None]), [3, 3, 1, 2, np.nan, np.nan, 4.0, 4.0])])
def test_groupby_series_observed_apply_dict(df_cat, observed, index, data):
# GH 24880
index = {
True: MultiIndex(levels=[['one', 'two'],
['bar', 'foo'],
['max', 'min']],
codes=[[0, 0, 0, 0, 1, 1],
[1, 1, 0, 0, 1, 1],
[1, 0, 1, 0, 1, 0]],
names=['a', 'b', None]),
False: MultiIndex(levels=[CategoricalIndex(['one', 'two'],
categories=['one', 'two'],
ordered=False),
CategoricalIndex(['bar', 'foo'],
categories=['bar', 'foo'],
ordered=False),
Index(['max', 'min'])],
codes=[[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[1, 0, 1, 0, 1, 0, 1, 0]],
names=['a', 'b', None])
}[bool(observed)]

expected = pd.Series(data=data, index=index, name='c')
result = df_cat.groupby(['a', 'b'], observed=observed)['c'].apply(
lambda x: OrderedDict([('min', x.min()), ('max', x.max())]))
Expand Down