From ce123cd0024029bb9138acdcc14382628a9310fc Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 16 Jan 2023 13:31:55 -0500 Subject: [PATCH] REGR: NumPy func warning when dropping nuisance in agg, apply, transform (#50627) REGR: Warnings for NumPy funcs when dropping nuisance in agg, apply, transform --- doc/source/whatsnew/v1.5.3.rst | 1 + pandas/core/apply.py | 24 ++++++++++-- pandas/core/groupby/groupby.py | 38 ++++++++++++++++--- pandas/tests/apply/test_frame_apply.py | 21 ++++++++++ .../tests/groupby/aggregate/test_aggregate.py | 12 ++++++ pandas/tests/groupby/test_apply.py | 13 +++++++ pandas/tests/groupby/test_groupby.py | 18 ++++++++- .../tests/groupby/transform/test_transform.py | 15 ++++++++ pandas/tests/resample/test_resample_api.py | 21 ++++++++++ pandas/tests/reshape/test_pivot.py | 13 +++++-- 10 files changed, 163 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 33d5bb4df51b8..489a6fda9ffab 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -19,6 +19,7 @@ Fixed regressions - Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) - Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`) - Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`) +- Fixed regression in the methods ``apply``, ``agg``, and ``transform`` when used with NumPy functions that informed users to supply ``numeric_only=True`` if the operation failed on non-numeric dtypes; such columns must be dropped prior to using these methods (:issue:`50538`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4c535abe585d4..4987a18ae2027 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -39,7 +39,10 @@ SpecificationError, ) from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level +from pandas.util._exceptions import ( + find_stack_level, + rewrite_warning, +) from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -174,7 +177,15 @@ def agg(self) -> DataFrame | Series | None: if callable(arg): f = com.get_cython_func(arg) if f and not args and not kwargs: - return getattr(obj, f)() + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {arg} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning(old_msg, FutureWarning, new_msg): + return getattr(obj, f)() # caller can react return None @@ -309,7 +320,14 @@ def transform_str_or_callable(self, func) -> DataFrame | Series: if not args and not kwargs: f = com.get_cython_func(func) if f: - return getattr(obj, f)() + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {func} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning(old_msg, FutureWarning, new_msg): + return getattr(obj, f)() # Two possible ways to use a UDF - apply or call directly try: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f3e7d9806ad1d..1c3a95b305087 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -8,7 +8,10 @@ class providing the base-class of operations. """ from __future__ import annotations -from contextlib import contextmanager +from contextlib import ( + contextmanager, + nullcontext, +) import datetime from functools import ( partial, @@ -64,7 +67,10 @@ class providing the base-class of operations. cache_readonly, doc, ) -from pandas.util._exceptions import find_stack_level +from pandas.util._exceptions import ( + find_stack_level, + rewrite_warning, +) from pandas.core.dtypes.cast import ensure_dtype_can_hold_na from pandas.core.dtypes.common import ( @@ -1508,7 +1514,9 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) ) ) def apply(self, func, *args, **kwargs) -> NDFrameT: - + # GH#50538 + is_np_func = func in com._cython_table and func not in com._builtin_table + orig_func = func func = com.is_builtin_func(func) if isinstance(func, str): @@ -1546,7 +1554,17 @@ def f(g): # ignore SettingWithCopy here in case the user mutates with option_context("mode.chained_assignment", None): try: - result = self._python_apply_general(f, self._selected_obj) + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {orig_func} failed on a column. If any error is " + f"raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning( + old_msg, FutureWarning, new_msg + ) if is_np_func else nullcontext(): + result = self._python_apply_general(f, self._selected_obj) except TypeError: # gh-20949 # try again, with .apply acting as a filtering @@ -1557,7 +1575,17 @@ def f(g): # on a string grouper column with self._group_selection_context(): - return self._python_apply_general(f, self._selected_obj) + # GH#50538 + old_msg = "The default value of numeric_only" + new_msg = ( + f"The operation {orig_func} failed on a column. If any error " + f"is raised, this will raise an exception in a future version " + f"of pandas. Drop these columns to avoid this warning." + ) + with rewrite_warning( + old_msg, FutureWarning, new_msg + ) if is_np_func else nullcontext(): + return self._python_apply_general(f, self._selected_obj) return result diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 3bcb7d964fad1..faa89e556a01e 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1287,6 +1287,27 @@ def test_nuiscance_columns(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["agg", "apply", "transform"]) +def test_numeric_only_warning_numpy(method): + # GH#50538 + df = DataFrame({"a": [1, 1, 2], "b": list("xyz")}) + if method == "agg": + msg = "The operation