Skip to content

BUG: in DataFrame.count not returning subclassed data types. #31139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ Reshaping
^^^^^^^^^

-
- Bug in :meth:`DataFrame.count` not returning subclassed data types.
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7811,7 +7811,7 @@ def count(self, axis=0, level=None, numeric_only=False):

# GH #423
if len(frame._get_axis(axis)) == 0:
result = Series(0, index=frame._get_agg_axis(axis))
result = frame._constructor_sliced(0, index=frame._get_agg_axis(axis))
else:
if frame._is_mixed_type or frame._data.any_extension_types:
# the or any_extension_types is really only hit for single-
Expand All @@ -7821,7 +7821,9 @@ def count(self, axis=0, level=None, numeric_only=False):
# GH13407
series_counts = notna(frame).sum(axis=axis)
counts = series_counts.values
result = Series(counts, index=frame._get_agg_axis(axis))
result = frame._constructor_sliced(
counts, index=frame._get_agg_axis(axis)
)

return result.astype("int64")

Expand Down Expand Up @@ -7860,7 +7862,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)

result = DataFrame(counts, index=level_index, columns=agg_axis)
result = frame._constructor(counts, index=level_index, columns=agg_axis)

if axis == 1:
# Undo our earlier transpose
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,36 @@ def strech(row):
result = df.apply(lambda x: [1, 2, 3], axis=1)
assert not isinstance(result, tm.SubclassedDataFrame)
tm.assert_series_equal(result, expected)

def test_subclassed_count(self):
# GH 31139

df = tm.SubclassedDataFrame(
{
"Person": ["John", "Myla", "Lewis", "John", "Myla"],
"Age": [24.0, np.nan, 21.0, 33, 26],
"Single": [False, True, True, True, False],
}
)
result = df.count()
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]})
result = df.count()
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame(
[[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
index=MultiIndex.from_tuples(
list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
),
columns=MultiIndex.from_tuples(
list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
),
)
result = df.count(level=1)
assert isinstance(result, tm.SubclassedDataFrame)

df = tm.SubclassedDataFrame()
result = df.count()
assert isinstance(result, tm.SubclassedSeries)