DEPR: Dropping of silent columns in NDFrame.agg with list-like func

pandas-dev · jreback · Sep 29, 2021 · Sep 25, 2021 · Sep 25, 2021 · Sep 25, 2021
commit bc67672031ff1b1b51719ea5af65388a3eb09799
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -337,6 +337,7 @@ Other Deprecations
 - Deprecated the ``squeeze`` argument to :meth:`read_csv`, :meth:`read_table`, and :meth:`read_excel`. Users should squeeze the DataFrame afterwards with ``.squeeze("columns")`` instead. (:issue:`43242`)
 - Deprecated the ``index`` argument to :class:`SparseArray` construction (:issue:`23089`)
 - Deprecated :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
+- Deprecated silent dropping of columns that raised a ``TypeError`` or ``DataError`` in :class:`Series.aggregate` and :class:`DataFrame.aggregate` when used with a list (:issue:``)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -33,6 +33,7 @@
     FrameOrSeries,
 )
 from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import (
@@ -335,6 +336,7 @@ def agg_list_like(self) -> DataFrame | Series:
 
         results = []
         keys = []
+        failed_names = []
 
         # degenerate case
         if selected_obj.ndim == 1:
@@ -344,7 +346,7 @@ def agg_list_like(self) -> DataFrame | Series:
                     new_res = colg.aggregate(a)
 
                 except TypeError:
-                    pass
+                    failed_names.append(com.get_callable_name(a) or a)
                 else:
                     results.append(new_res)
 
@@ -358,10 +360,14 @@ def agg_list_like(self) -> DataFrame | Series:
             for index, col in enumerate(selected_obj):
                 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
                 try:
-                    new_res = colg.aggregate(arg)
+                    with warnings.catch_warnings(record=True) as w:
+                        new_res = colg.aggregate(arg)
+                        if len(w) > 0:
+                            failed_names.append(col)
                 except (TypeError, DataError):
-                    pass
+                    failed_names.append(col)
                 except ValueError as err:
+                    failed_names.append(col)
                     # cannot aggregate
                     if "Must produce aggregated value" in str(err):
                         # raised directly in _aggregate_named
@@ -384,6 +390,15 @@ def agg_list_like(self) -> DataFrame | Series:
         if not len(results):
             raise ValueError("no results")
 
+        if len(failed_names) > 0:
+            warnings.warn(
+                f"{failed_names} did not aggregate successfully. If any error is "
+                f"raised this will raise in a future version of pandas. "
+                f"Drop these columns/ops to avoid this warning.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+
         try:
             concatenated = concat(results, keys=keys, axis=1, sort=False)
         except TypeError as err:

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1087,12 +1087,16 @@ def test_agg_multiple_mixed_no_warning():
         index=["min", "sum"],
     )
     # sorted index
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['D'\] did not aggregate successfully"
+    ):
         result = mdf.agg(["min", "sum"])
 
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['D'\] did not aggregate successfully"
+    ):
         result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
 
     # GH40420: the result of .agg should have an index that is sorted
@@ -1201,7 +1205,10 @@ def test_nuiscance_columns():
     expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
     tm.assert_series_equal(result, expected)
 
-    result = df.agg(["sum"])
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['D'\] did not aggregate successfully"
+    ):
+        result = df.agg(["sum"])
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
     )
@@ -1433,7 +1440,10 @@ def foo(s):
         return s.sum() / 2
 
     aggs = ["sum", foo, "count", "min"]
-    result = df.agg(aggs)
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['item'\] did not aggregate successfully"
+    ):
+        result = df.agg(aggs)
     expected = DataFrame(
         {
             "item": ["123456", np.nan, 6, "1"],

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -339,8 +339,14 @@ def test_multiple_functions_tuples_and_non_tuples(df):
     expected = df.groupby("A")["C"].agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
 
-    result = df.groupby("A").agg(funcs)
-    expected = df.groupby("A").agg(ex_funcs)
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+    ):
+        result = df.groupby("A").agg(funcs)
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+    ):
+        expected = df.groupby("A").agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -45,13 +45,17 @@ def peak_to_peak(arr):
         return arr.max() - arr.min()
 
     with tm.assert_produces_warning(
-        FutureWarning, match="Dropping invalid", check_stacklevel=False
+        FutureWarning,
+        match=r"\['key2'\] did not aggregate successfully",
+        check_stacklevel=False,
     ):
         expected = grouped.agg([peak_to_peak])
     expected.columns = ["data1", "data2"]
 
     with tm.assert_produces_warning(
-        FutureWarning, match="Dropping invalid", check_stacklevel=False
+        FutureWarning,
+        match=r"\['key2'\] did not aggregate successfully",
+        check_stacklevel=False,
     ):
         result = grouped.agg(peak_to_peak)
     tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -583,7 +583,10 @@ def test_frame_multi_key_function_list():
 
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
-    agged = grouped.agg(funcs)
+    with tm.assert_produces_warning(
+        FutureWarning, match=r"\['C'\] did not aggregate successfully"
+    ):
+        agged = grouped.agg(funcs)
     expected = pd.concat(
         [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
         keys=["D", "E", "F"],

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -352,7 +352,9 @@ def test_agg():
     for t in cases:
         warn = FutureWarning if t in cases[1:3] else None
         with tm.assert_produces_warning(
-            warn, match="Dropping invalid columns", check_stacklevel=False
+            warn,
+            match=r"\['date'\] did not aggregate successfully",
+            check_stacklevel=False,
         ):
             # .var on dt64 column raises and is dropped
             result = t.aggregate([np.mean, np.std])