Skip to content

Commit

Permalink
BUG: GH38672 SeriesGroupBy.value_counts for categorical (pandas-dev#3…
Browse files Browse the repository at this point in the history
  • Loading branch information
venaturum authored and luckyvs1 committed Jan 20, 2021
1 parent eda197b commit cc55bca
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
- Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`)
-

Reshaping
Expand Down
18 changes: 13 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
ensure_int64,
ensure_platform_int,
is_bool,
is_categorical_dtype,
is_integer_dtype,
is_interval_dtype,
is_numeric_dtype,
Expand Down Expand Up @@ -681,9 +682,10 @@ def value_counts(
from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut

if bins is not None and not np.iterable(bins):
# scalar bins cannot be done at top level
# in a backward compatible way
ids, _, _ = self.grouper.group_info
val = self.obj._values

def apply_series_value_counts():
return self.apply(
Series.value_counts,
normalize=normalize,
Expand All @@ -692,8 +694,14 @@ def value_counts(
bins=bins,
)

ids, _, _ = self.grouper.group_info
val = self.obj._values
if bins is not None:
if not np.iterable(bins):
# scalar bins cannot be done at top level
# in a backward compatible way
return apply_series_value_counts()
elif is_categorical_dtype(val):
# GH38672
return apply_series_value_counts()

# groupby removes null keys from groupings
mask = ids != -1
Expand Down
38 changes: 37 additions & 1 deletion pandas/tests/groupby/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@
import numpy as np
import pytest

from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
Grouper,
MultiIndex,
Series,
date_range,
to_datetime,
)
import pandas._testing as tm


Expand Down Expand Up @@ -111,3 +120,30 @@ def test_series_groupby_value_counts_with_grouper():
expected.index.names = result.index.names

tm.assert_series_equal(result, expected)


def test_series_groupby_value_counts_on_categorical():
# GH38672

s = Series(Categorical(["a"], categories=["a", "b"]))
result = s.groupby([0]).value_counts()

expected = Series(
data=[1, 0],
index=MultiIndex.from_arrays(
[
[0, 0],
CategoricalIndex(
["a", "b"], categories=["a", "b"], ordered=False, dtype="category"
),
]
),
name=0,
)

# Expected:
# 0 a 1
# b 0
# Name: 0, dtype: int64

tm.assert_series_equal(result, expected)

0 comments on commit cc55bca

Please sign in to comment.