Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: support axis=None in DataFrame reductions #52042

Merged
merged 22 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2f8666f
DEPR: support axis=None in DataFrame reductions
jbrockmendel Mar 17, 2023
5bfe28a
Merge branch 'main' into depr-reductions
jbrockmendel Mar 17, 2023
395b91d
Merge branch 'main' of https://github.com/pandas-dev/pandas into depr…
jbrockmendel Mar 17, 2023
aa592d6
Merge branch 'main' into depr-reductions
jbrockmendel Mar 20, 2023
71f70eb
Merge branch 'main' into depr-reductions
jbrockmendel Mar 22, 2023
5b8e078
test, whatsnew
jbrockmendel Mar 22, 2023
b07fc3b
Merge branch 'main' into depr-reductions
jbrockmendel Mar 23, 2023
b1a1b1c
Merge branch 'main' into depr-reductions
jbrockmendel Mar 24, 2023
6a99ba5
catch in apply(sum)
jbrockmendel Apr 29, 2023
8d8703a
Merge branch 'main' into depr-reductions
jbrockmendel Apr 29, 2023
09a46ac
Merge branch 'main' into depr-reductions
jbrockmendel May 5, 2023
cd6879e
Merge branch 'main' into depr-reductions
jbrockmendel May 15, 2023
c35b206
Merge branch 'main' into depr-reductions
jbrockmendel May 16, 2023
b1d4ab7
Fix defaults
jbrockmendel May 17, 2023
54f90e4
Merge branch 'main' into depr-reductions
jbrockmendel May 17, 2023
adafb33
catch warnings
jbrockmendel May 17, 2023
13bb07d
Merge branch 'main' into depr-reductions
jbrockmendel May 22, 2023
3b2f589
dont check stacklevel
jbrockmendel May 22, 2023
fe2b690
Merge branch 'main' into depr-reductions
jbrockmendel May 24, 2023
b335e65
mypy fixup
jbrockmendel May 24, 2023
05e991b
Merge branch 'main' into depr-reductions
jbrockmendel May 25, 2023
ecef601
catch warning
jbrockmendel May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ API changes
current behavior:

.. ipython:: python
:okwarning:

gr.apply(sum)

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ Deprecations
- Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`)
- Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`)
- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :meth:`Series.dt` properties (:issue:`20306`)
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10998,7 +10998,7 @@ def max(
@doc(make_doc("sum", ndim=2))
def sum(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
numeric_only: bool = False,
min_count: int = 0,
Expand All @@ -11010,7 +11010,7 @@ def sum(
@doc(make_doc("prod", ndim=2))
def prod(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
numeric_only: bool = False,
min_count: int = 0,
Expand Down Expand Up @@ -11041,7 +11041,7 @@ def median(
@doc(make_doc("sem", ndim=2))
def sem(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand All @@ -11052,7 +11052,7 @@ def sem(
@doc(make_doc("var", ndim=2))
def var(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand All @@ -11063,7 +11063,7 @@ def var(
@doc(make_doc("std", ndim=2))
def std(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand Down
39 changes: 32 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11290,6 +11290,8 @@ def _logical_func(
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return res._logical_func(name, func, skipna=skipna, **kwargs)
elif axis is None:
axis = 0

if (
self.ndim > 1
Expand Down Expand Up @@ -11394,15 +11396,27 @@ def _stat_function_ddof(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
**kwargs,
) -> Series | float:
nv.validate_stat_ddof_func((), kwargs, fname=name)
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11411,7 +11425,7 @@ def _stat_function_ddof(

def sem(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11423,7 +11437,7 @@ def sem(

def var(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11435,7 +11449,7 @@ def var(

def std(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand Down Expand Up @@ -11547,7 +11561,7 @@ def _min_count_stat_function(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11559,6 +11573,17 @@ def _min_count_stat_function(
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11572,7 +11597,7 @@ def _min_count_stat_function(

def sum(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11584,7 +11609,7 @@ def sum(

def prod(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/frame/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ def test_np_sqrt(self, float_frame):

tm.assert_frame_equal(result, float_frame.apply(np.sqrt))

def test_sum_deprecated_axis_behavior(self):
# GH#52042 deprecated behavior of df.sum(axis=None), which gets
# called when we do np.sum(df)

arr = np.random.randn(4, 3)
df = DataFrame(arr)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
res = np.sum(df)

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.sum(axis=None)
tm.assert_series_equal(res, expected)

def test_np_ravel(self):
# GH26247
arr = np.array(
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,11 @@ def test_agg_apply_corner(ts, tsframe):
)
tm.assert_frame_equal(grouped.sum(), exp_df)
tm.assert_frame_equal(grouped.agg(np.sum), exp_df)
tm.assert_frame_equal(grouped.apply(np.sum), exp_df)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = grouped.apply(np.sum)
tm.assert_frame_equal(res, exp_df)


def test_agg_grouping_is_list_tuple(ts):
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,14 +1072,17 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):

# Check output when no other methods are called before .apply()
grp = df.groupby(by="a")
result = grp.apply(sum)
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
result = grp.apply(sum)
tm.assert_frame_equal(result, expected)

# Check output when another method is called before .apply()
grp = df.groupby(by="a")
args = get_groupby_method_args(reduction_func, df)
_ = getattr(grp, reduction_func)(*args)
result = grp.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
result = grp.apply(sum)
tm.assert_frame_equal(result, expected)


Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ def test_builtins_apply(keys, f):
gb = df.groupby(keys)

fname = f.__name__
result = gb.apply(f)

warn = None if f is not sum else FutureWarning
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
result = gb.apply(f)
ngroups = len(df.drop_duplicates(subset=keys))

assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,13 @@ def test_groupby_as_index_agg(df):

gr = df.groupby(ts)
gr.nth(0) # invokes set_selection_from_grouper internally
tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = gr.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
alt = df.groupby(ts).apply(sum)
tm.assert_frame_equal(res, alt)

for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
gr = df.groupby(ts, as_index=False)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,12 @@ def test_expanding_func(func, static_comp, frame_or_series):
result = getattr(obj, func)()
assert isinstance(result, frame_or_series)

expected = static_comp(data[:11])
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
warn = None
if frame_or_series is DataFrame and static_comp is np.sum:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
expected = static_comp(data[:11])
if frame_or_series is Series:
tm.assert_almost_equal(result[10], expected)
else:
Expand Down