pandas-dev · jreback · Oct 29, 2021 · Oct 28, 2021 · jreback · Oct 29, 2021
diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py
@@ -7,6 +7,7 @@
 """
 
 import numpy as np
+import pytest
 
 from pandas import (
     DataFrame,
@@ -19,53 +20,22 @@ class TestDataFrameCumulativeOps:
     # ---------------------------------------------------------------------
     # Cumulative Operations - cumsum, cummax, ...
 
-    def test_cumsum_corner(self):
-        dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5))
-        # TODO(wesm): do something with this?
-        result = dm.cumsum()  # noqa
-
-    def test_cumsum(self, datetime_frame):
-        datetime_frame.iloc[5:10, 0] = np.nan
-        datetime_frame.iloc[10:15, 1] = np.nan
-        datetime_frame.iloc[15:, 2] = np.nan
-
-        # axis = 0
-        cumsum = datetime_frame.cumsum()
-        expected = datetime_frame.apply(Series.cumsum)
-        tm.assert_frame_equal(cumsum, expected)
-
-        # axis = 1
-        cumsum = datetime_frame.cumsum(axis=1)
-        expected = datetime_frame.apply(Series.cumsum, axis=1)
-        tm.assert_frame_equal(cumsum, expected)
-
-        # works
+    def test_cumulative_ops_smoke(self):
+        # it works
         df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
+        df.cummax()
+        df.cummin()
         df.cumsum()
 
-        # fix issue
-        cumsum_xs = datetime_frame.cumsum(axis=1)
-        assert np.shape(cumsum_xs) == np.shape(datetime_frame)
+        dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5))
+        # TODO(wesm): do something with this?
+        dm.cumsum()
 
-    def test_cumprod(self, datetime_frame):
+    def test_cumprod_smoke(self, datetime_frame):
         datetime_frame.iloc[5:10, 0] = np.nan
         datetime_frame.iloc[10:15, 1] = np.nan
         datetime_frame.iloc[15:, 2] = np.nan
 
-        # axis = 0
-        cumprod = datetime_frame.cumprod()
-        expected = datetime_frame.apply(Series.cumprod)
-        tm.assert_frame_equal(cumprod, expected)
-
-        # axis = 1
-        cumprod = datetime_frame.cumprod(axis=1)
-        expected = datetime_frame.apply(Series.cumprod, axis=1)
-        tm.assert_frame_equal(cumprod, expected)
-
-        # fix issue
-        cumprod_xs = datetime_frame.cumprod(axis=1)
-        assert np.shape(cumprod_xs) == np.shape(datetime_frame)
-
         # ints
         df = datetime_frame.fillna(0).astype(int)
         df.cumprod(0)
@@ -76,53 +46,26 @@ def test_cumprod(self, datetime_frame):
         df.cumprod(0)
         df.cumprod(1)
 
-    def test_cummin(self, datetime_frame):
-        datetime_frame.iloc[5:10, 0] = np.nan
-        datetime_frame.iloc[10:15, 1] = np.nan
-        datetime_frame.iloc[15:, 2] = np.nan
-
-        # axis = 0
-        cummin = datetime_frame.cummin()
-        expected = datetime_frame.apply(Series.cummin)
-        tm.assert_frame_equal(cummin, expected)
-
-        # axis = 1
-        cummin = datetime_frame.cummin(axis=1)
-        expected = datetime_frame.apply(Series.cummin, axis=1)
-        tm.assert_frame_equal(cummin, expected)
-
-        # it works
-        df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
-        df.cummin()
-
-        # fix issue
-        cummin_xs = datetime_frame.cummin(axis=1)
-        assert np.shape(cummin_xs) == np.shape(datetime_frame)
-
-    def test_cummax(self, datetime_frame):
+    @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"])
+    def test_cumulative_ops_match_series_apply(self, datetime_frame, method):
         datetime_frame.iloc[5:10, 0] = np.nan
         datetime_frame.iloc[10:15, 1] = np.nan
         datetime_frame.iloc[15:, 2] = np.nan
 
         # axis = 0
-        cummax = datetime_frame.cummax()
-        expected = datetime_frame.apply(Series.cummax)
-        tm.assert_frame_equal(cummax, expected)
+        result = getattr(datetime_frame, method)()
+        expected = datetime_frame.apply(getattr(Series, method))
+        tm.assert_frame_equal(result, expected)
 
         # axis = 1
-        cummax = datetime_frame.cummax(axis=1)
-        expected = datetime_frame.apply(Series.cummax, axis=1)
-        tm.assert_frame_equal(cummax, expected)
-
-        # it works
-        df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
-        df.cummax()
+        result = getattr(datetime_frame, method)(axis=1)
+        expected = datetime_frame.apply(getattr(Series, method), axis=1)
+        tm.assert_frame_equal(result, expected)
 
-        # fix issue
-        cummax_xs = datetime_frame.cummax(axis=1)
-        assert np.shape(cummax_xs) == np.shape(datetime_frame)
+        # fix issue TODO: GH ref?
+        assert np.shape(result) == np.shape(datetime_frame)
 
-    def test_cumulative_ops_preserve_dtypes(self):
+    def test_cumsum_preserve_dtypes(self):
         # GH#19296 dont incorrectly upcast to object
         df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]})
 

diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py
@@ -5,14 +5,20 @@
 --------
 tests.frame.test_cumulative
 """
-from itertools import product
 
 import numpy as np
 import pytest
 
 import pandas as pd
 import pandas._testing as tm
 
+methods = {
+    "cumsum": np.cumsum,
+    "cumprod": np.cumprod,
+    "cummin": np.minimum.accumulate,
+    "cummax": np.maximum.accumulate,
+}
+
 
 def _check_accum_op(name, series, check_dtype=True):
     func = getattr(np, name)
@@ -37,136 +43,88 @@ def test_cumsum(self, datetime_series):
     def test_cumprod(self, datetime_series):
         _check_accum_op("cumprod", datetime_series)
 
-    def test_cummin(self, datetime_series):
-        tm.assert_numpy_array_equal(
-            datetime_series.cummin().values,
-            np.minimum.accumulate(np.array(datetime_series)),
-        )
-        ts = datetime_series.copy()
-        ts[::2] = np.NaN
-        result = ts.cummin()[1::2]
-        expected = np.minimum.accumulate(ts.dropna())
+    @pytest.mark.parametrize("method", ["cummin", "cummax"])
+    def test_cummin_cummax(self, datetime_series, method):
+        ufunc = methods[method]
 
-        result.index = result.index._with_freq(None)
-        tm.assert_series_equal(result, expected)
+        result = getattr(datetime_series, method)().values
+        expected = ufunc(np.array(datetime_series))
 
-    def test_cummax(self, datetime_series):
-        tm.assert_numpy_array_equal(
-            datetime_series.cummax().values,
-            np.maximum.accumulate(np.array(datetime_series)),
-        )
+        tm.assert_numpy_array_equal(result, expected)
         ts = datetime_series.copy()
         ts[::2] = np.NaN
-        result = ts.cummax()[1::2]
-        expected = np.maximum.accumulate(ts.dropna())
+        result = getattr(ts, method)()[1::2]
+        expected = ufunc(ts.dropna())
 
         result.index = result.index._with_freq(None)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize("tz", [None, "US/Pacific"])
-    def test_cummin_datetime64(self, tz):
-        s = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]
-            ).tz_localize(tz)
-        )
-
-        expected = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]
-            ).tz_localize(tz)
-        )
-        result = s.cummin(skipna=True)
+    @pytest.mark.parametrize(
+        "ts",
+        [
+            pd.Timedelta(0),
+            pd.Timestamp("1999-12-31"),
+            pd.Timestamp("1999-12-31").tz_localize("US/Pacific"),
+        ],
+    )
+    def test_cummin_cummax_datetimelike(self, ts):
+        # with ts==pd.Timedelta(0), we are testing td64; with naive Timestamp
+        #  we are testing datetime64[ns]; with Timestamp[US/Pacific]
+        #  we are testing dt64tz
+        tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "3 days"])
+        ser = pd.Series(tdi + ts)
+
+        exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "2 days", "NaT", "3 days"])
+        expected = pd.Series(exp_tdi + ts)
+        result = ser.cummax(skipna=True)
         tm.assert_series_equal(expected, result)
 
-        expected = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"]
-            ).tz_localize(tz)
-        )
-        result = s.cummin(skipna=False)
+        exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "1 days"])
+        expected = pd.Series(exp_tdi + ts)
+        result = ser.cummin(skipna=True)
         tm.assert_series_equal(expected, result)
 
-    @pytest.mark.parametrize("tz", [None, "US/Pacific"])
-    def test_cummax_datetime64(self, tz):
-        s = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]
-            ).tz_localize(tz)
+        exp_tdi = pd.to_timedelta(
+            ["NaT", "2 days", "2 days", "2 days", "2 days", "3 days"]
         )
-
-        expected = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]
-            ).tz_localize(tz)
-        )
-        result = s.cummax(skipna=True)
+        expected = pd.Series(exp_tdi + ts)
+        result = ser.cummax(skipna=False)
         tm.assert_series_equal(expected, result)
 
-        expected = pd.Series(
-            pd.to_datetime(
-                ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"]
-            ).tz_localize(tz)
+        exp_tdi = pd.to_timedelta(
+            ["NaT", "2 days", "2 days", "1 days", "1 days", "1 days"]
         )
-        result = s.cummax(skipna=False)
+        expected = pd.Series(exp_tdi + ts)
+        result = ser.cummin(skipna=False)
         tm.assert_series_equal(expected, result)
 
-    def test_cummin_timedelta64(self):
-        s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
+    def test_cummethods_bool(self):
+        # GH#6270
+        # checking Series method vs the ufunc applied to the values
 
-        expected = pd.Series(
-            pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"])
-        )
-        result = s.cummin(skipna=True)
-        tm.assert_series_equal(expected, result)
+        a = pd.Series([False, False, False, True, True, False, False])
+        c = pd.Series([False] * len(a))
 
-        expected = pd.Series(
-            pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"])
-        )
-        result = s.cummin(skipna=False)
-        tm.assert_series_equal(expected, result)
+        for method in methods:
+            for ser in [a, ~a, c, ~c]:
+                ufunc = methods[method]
 
-    def test_cummax_timedelta64(self):
-        s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
+                exp_vals = ufunc(ser.values)
+                expected = pd.Series(exp_vals)
 
-        expected = pd.Series(
-            pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"])
-        )
-        result = s.cummax(skipna=True)
-        tm.assert_series_equal(expected, result)
+                result = getattr(ser, method)()
 
-        expected = pd.Series(
-            pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"])
-        )
-        result = s.cummax(skipna=False)
-        tm.assert_series_equal(expected, result)
+                tm.assert_series_equal(result, expected)
 
-    def test_cummethods_bool(self):
-        # GH#6270
+    def test_cummethods_bool_in_object_dtype(self):
 
-        a = pd.Series([False, False, False, True, True, False, False])
-        b = ~a
-        c = pd.Series([False] * len(b))
-        d = ~c
-        methods = {
-            "cumsum": np.cumsum,
-            "cumprod": np.cumprod,
-            "cummin": np.minimum.accumulate,
-            "cummax": np.maximum.accumulate,
-        }
-        args = product((a, b, c, d), methods)
-        for s, method in args:
-            expected = pd.Series(methods[method](s.values))
-            result = getattr(s, method)()
-            tm.assert_series_equal(result, expected)
-
-        e = pd.Series([False, True, np.nan, False])
+        ser = pd.Series([False, True, np.nan, False])
         cse = pd.Series([0, 1, np.nan, 1], dtype=object)
         cpe = pd.Series([False, 0, np.nan, 0])
         cmin = pd.Series([False, False, np.nan, False])
         cmax = pd.Series([False, True, np.nan, True])
         expecteds = {"cumsum": cse, "cumprod": cpe, "cummin": cmin, "cummax": cmax}
 
         for method in methods:
-            res = getattr(e, method)()
+            res = getattr(ser, method)()
             tm.assert_series_equal(res, expecteds[method])