Skip to content

Commit

Permalink
DEPR: support axis=None in DataFrame reductions (pandas-dev#52042)
Browse files Browse the repository at this point in the history
* DEPR: support axis=None in DataFrame reductions

* test, whatsnew

* catch in apply(sum)

* Fix defaults

* catch warnings

* dont check stacklevel

* mypy fixup

* catch warning
  • Loading branch information
jbrockmendel authored and topper-123 committed May 27, 2023
1 parent f97b19a commit 448e5c2
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 18 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ API changes
current behavior:

.. ipython:: python
:okwarning:
gr.apply(sum)
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ Deprecations
- Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`)
- Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`)
- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :meth:`Series.dt` properties (:issue:`20306`)
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10981,7 +10981,7 @@ def max(
@doc(make_doc("sum", ndim=2))
def sum(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
numeric_only: bool = False,
min_count: int = 0,
Expand All @@ -10993,7 +10993,7 @@ def sum(
@doc(make_doc("prod", ndim=2))
def prod(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
numeric_only: bool = False,
min_count: int = 0,
Expand Down Expand Up @@ -11024,7 +11024,7 @@ def median(
@doc(make_doc("sem", ndim=2))
def sem(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand All @@ -11035,7 +11035,7 @@ def sem(
@doc(make_doc("var", ndim=2))
def var(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand All @@ -11046,7 +11046,7 @@ def var(
@doc(make_doc("std", ndim=2))
def std(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand Down
39 changes: 32 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11290,6 +11290,8 @@ def _logical_func(
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return res._logical_func(name, func, skipna=skipna, **kwargs)
elif axis is None:
axis = 0

if (
self.ndim > 1
Expand Down Expand Up @@ -11394,15 +11396,27 @@ def _stat_function_ddof(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
**kwargs,
) -> Series | float:
nv.validate_stat_ddof_func((), kwargs, fname=name)
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11411,7 +11425,7 @@ def _stat_function_ddof(

def sem(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11423,7 +11437,7 @@ def sem(

def var(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11435,7 +11449,7 @@ def var(

def std(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand Down Expand Up @@ -11547,7 +11561,7 @@ def _min_count_stat_function(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11559,6 +11573,17 @@ def _min_count_stat_function(
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11572,7 +11597,7 @@ def _min_count_stat_function(

def sum(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11584,7 +11609,7 @@ def sum(

def prod(
self,
axis: Axis | None = None,
axis: Axis | None = 0,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/frame/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ def test_np_sqrt(self, float_frame):

tm.assert_frame_equal(result, float_frame.apply(np.sqrt))

def test_sum_deprecated_axis_behavior(self):
# GH#52042 deprecated behavior of df.sum(axis=None), which gets
# called when we do np.sum(df)

arr = np.random.randn(4, 3)
df = DataFrame(arr)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
res = np.sum(df)

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.sum(axis=None)
tm.assert_series_equal(res, expected)

def test_np_ravel(self):
# GH26247
arr = np.array(
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,11 @@ def test_agg_apply_corner(ts, tsframe):
)
tm.assert_frame_equal(grouped.sum(), exp_df)
tm.assert_frame_equal(grouped.agg(np.sum), exp_df)
tm.assert_frame_equal(grouped.apply(np.sum), exp_df)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = grouped.apply(np.sum)
tm.assert_frame_equal(res, exp_df)


def test_agg_grouping_is_list_tuple(ts):
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,14 +1071,17 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):

# Check output when no other methods are called before .apply()
grp = df.groupby(by="a")
result = grp.apply(sum)
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
result = grp.apply(sum)
tm.assert_frame_equal(result, expected)

# Check output when another method is called before .apply()
grp = df.groupby(by="a")
args = get_groupby_method_args(reduction_func, df)
_ = getattr(grp, reduction_func)(*args)
result = grp.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
result = grp.apply(sum)
tm.assert_frame_equal(result, expected)


Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ def test_builtins_apply(keys, f):
gb = df.groupby(keys)

fname = f.__name__
result = gb.apply(f)

warn = None if f is not sum else FutureWarning
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
result = gb.apply(f)
ngroups = len(df.drop_duplicates(subset=keys))

assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,13 @@ def test_groupby_as_index_agg(df):

gr = df.groupby(ts)
gr.nth(0) # invokes set_selection_from_grouper internally
tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = gr.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
alt = df.groupby(ts).apply(sum)
tm.assert_frame_equal(res, alt)

for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
gr = df.groupby(ts, as_index=False)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,12 @@ def test_expanding_func(func, static_comp, frame_or_series):
result = getattr(obj, func)()
assert isinstance(result, frame_or_series)

expected = static_comp(data[:11])
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
warn = None
if frame_or_series is DataFrame and static_comp is np.sum:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
expected = static_comp(data[:11])
if frame_or_series is Series:
tm.assert_almost_equal(result[10], expected)
else:
Expand Down

0 comments on commit 448e5c2

Please sign in to comment.