Skip to content

TST: parametrize cumulative tests #44214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 20 additions & 77 deletions pandas/tests/frame/test_cumulative.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import numpy as np
import pytest

from pandas import (
DataFrame,
Expand All @@ -19,53 +20,22 @@ class TestDataFrameCumulativeOps:
# ---------------------------------------------------------------------
# Cumulative Operations - cumsum, cummax, ...

def test_cumsum_corner(self):
dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5))
# TODO(wesm): do something with this?
result = dm.cumsum() # noqa

def test_cumsum(self, datetime_frame):
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cumsum = datetime_frame.cumsum()
expected = datetime_frame.apply(Series.cumsum)
tm.assert_frame_equal(cumsum, expected)

# axis = 1
cumsum = datetime_frame.cumsum(axis=1)
expected = datetime_frame.apply(Series.cumsum, axis=1)
tm.assert_frame_equal(cumsum, expected)

# works
def test_cumulative_ops_smoke(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these smoketests are pretty non-useful tests......

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep. id be OK with getting rid of em

# it works
df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
df.cummax()
df.cummin()
df.cumsum()

# fix issue
cumsum_xs = datetime_frame.cumsum(axis=1)
assert np.shape(cumsum_xs) == np.shape(datetime_frame)
dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5))
# TODO(wesm): do something with this?
dm.cumsum()

def test_cumprod(self, datetime_frame):
def test_cumprod_smoke(self, datetime_frame):
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cumprod = datetime_frame.cumprod()
expected = datetime_frame.apply(Series.cumprod)
tm.assert_frame_equal(cumprod, expected)

# axis = 1
cumprod = datetime_frame.cumprod(axis=1)
expected = datetime_frame.apply(Series.cumprod, axis=1)
tm.assert_frame_equal(cumprod, expected)

# fix issue
cumprod_xs = datetime_frame.cumprod(axis=1)
assert np.shape(cumprod_xs) == np.shape(datetime_frame)

# ints
df = datetime_frame.fillna(0).astype(int)
df.cumprod(0)
Expand All @@ -76,53 +46,26 @@ def test_cumprod(self, datetime_frame):
df.cumprod(0)
df.cumprod(1)

def test_cummin(self, datetime_frame):
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cummin = datetime_frame.cummin()
expected = datetime_frame.apply(Series.cummin)
tm.assert_frame_equal(cummin, expected)

# axis = 1
cummin = datetime_frame.cummin(axis=1)
expected = datetime_frame.apply(Series.cummin, axis=1)
tm.assert_frame_equal(cummin, expected)

# it works
df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
df.cummin()

# fix issue
cummin_xs = datetime_frame.cummin(axis=1)
assert np.shape(cummin_xs) == np.shape(datetime_frame)

def test_cummax(self, datetime_frame):
@pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a fixture for this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe if we start needing it in more than one place

def test_cumulative_ops_match_series_apply(self, datetime_frame, method):
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cummax = datetime_frame.cummax()
expected = datetime_frame.apply(Series.cummax)
tm.assert_frame_equal(cummax, expected)
result = getattr(datetime_frame, method)()
expected = datetime_frame.apply(getattr(Series, method))
tm.assert_frame_equal(result, expected)

# axis = 1
cummax = datetime_frame.cummax(axis=1)
expected = datetime_frame.apply(Series.cummax, axis=1)
tm.assert_frame_equal(cummax, expected)

# it works
df = DataFrame({"A": np.arange(20)}, index=np.arange(20))
df.cummax()
result = getattr(datetime_frame, method)(axis=1)
expected = datetime_frame.apply(getattr(Series, method), axis=1)
tm.assert_frame_equal(result, expected)

# fix issue
cummax_xs = datetime_frame.cummax(axis=1)
assert np.shape(cummax_xs) == np.shape(datetime_frame)
# fix issue TODO: GH ref?
assert np.shape(result) == np.shape(datetime_frame)

def test_cumulative_ops_preserve_dtypes(self):
def test_cumsum_preserve_dtypes(self):
# GH#19296 dont incorrectly upcast to object
df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]})

Expand Down
160 changes: 59 additions & 101 deletions pandas/tests/series/test_cumulative.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@
--------
tests.frame.test_cumulative
"""
from itertools import product

import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm

methods = {
"cumsum": np.cumsum,
"cumprod": np.cumprod,
"cummin": np.minimum.accumulate,
"cummax": np.maximum.accumulate,
}


def _check_accum_op(name, series, check_dtype=True):
func = getattr(np, name)
Expand All @@ -37,136 +43,88 @@ def test_cumsum(self, datetime_series):
def test_cumprod(self, datetime_series):
_check_accum_op("cumprod", datetime_series)

def test_cummin(self, datetime_series):
tm.assert_numpy_array_equal(
datetime_series.cummin().values,
np.minimum.accumulate(np.array(datetime_series)),
)
ts = datetime_series.copy()
ts[::2] = np.NaN
result = ts.cummin()[1::2]
expected = np.minimum.accumulate(ts.dropna())
@pytest.mark.parametrize("method", ["cummin", "cummax"])
def test_cummin_cummax(self, datetime_series, method):
ufunc = methods[method]

result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)
result = getattr(datetime_series, method)().values
expected = ufunc(np.array(datetime_series))

def test_cummax(self, datetime_series):
tm.assert_numpy_array_equal(
datetime_series.cummax().values,
np.maximum.accumulate(np.array(datetime_series)),
)
tm.assert_numpy_array_equal(result, expected)
ts = datetime_series.copy()
ts[::2] = np.NaN
result = ts.cummax()[1::2]
expected = np.maximum.accumulate(ts.dropna())
result = getattr(ts, method)()[1::2]
expected = ufunc(ts.dropna())

result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_cummin_datetime64(self, tz):
s = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]
).tz_localize(tz)
)

expected = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]
).tz_localize(tz)
)
result = s.cummin(skipna=True)
@pytest.mark.parametrize(
"ts",
[
pd.Timedelta(0),
pd.Timestamp("1999-12-31"),
pd.Timestamp("1999-12-31").tz_localize("US/Pacific"),
],
)
def test_cummin_cummax_datetimelike(self, ts):
# with ts==pd.Timedelta(0), we are testing td64; with naive Timestamp
# we are testing datetime64[ns]; with Timestamp[US/Pacific]
# we are testing dt64tz
tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "3 days"])
ser = pd.Series(tdi + ts)

exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "2 days", "NaT", "3 days"])
expected = pd.Series(exp_tdi + ts)
result = ser.cummax(skipna=True)
tm.assert_series_equal(expected, result)

expected = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"]
).tz_localize(tz)
)
result = s.cummin(skipna=False)
exp_tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "1 days"])
expected = pd.Series(exp_tdi + ts)
result = ser.cummin(skipna=True)
tm.assert_series_equal(expected, result)

@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_cummax_datetime64(self, tz):
s = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]
).tz_localize(tz)
exp_tdi = pd.to_timedelta(
["NaT", "2 days", "2 days", "2 days", "2 days", "3 days"]
)

expected = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]
).tz_localize(tz)
)
result = s.cummax(skipna=True)
expected = pd.Series(exp_tdi + ts)
result = ser.cummax(skipna=False)
tm.assert_series_equal(expected, result)

expected = pd.Series(
pd.to_datetime(
["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"]
).tz_localize(tz)
exp_tdi = pd.to_timedelta(
["NaT", "2 days", "2 days", "1 days", "1 days", "1 days"]
)
result = s.cummax(skipna=False)
expected = pd.Series(exp_tdi + ts)
result = ser.cummin(skipna=False)
tm.assert_series_equal(expected, result)

def test_cummin_timedelta64(self):
s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
def test_cummethods_bool(self):
# GH#6270
# checking Series method vs the ufunc applied to the values

expected = pd.Series(
pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"])
)
result = s.cummin(skipna=True)
tm.assert_series_equal(expected, result)
a = pd.Series([False, False, False, True, True, False, False])
c = pd.Series([False] * len(a))

expected = pd.Series(
pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"])
)
result = s.cummin(skipna=False)
tm.assert_series_equal(expected, result)
for method in methods:
for ser in [a, ~a, c, ~c]:
ufunc = methods[method]

def test_cummax_timedelta64(self):
s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"]))
exp_vals = ufunc(ser.values)
expected = pd.Series(exp_vals)

expected = pd.Series(
pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"])
)
result = s.cummax(skipna=True)
tm.assert_series_equal(expected, result)
result = getattr(ser, method)()

expected = pd.Series(
pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"])
)
result = s.cummax(skipna=False)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(result, expected)

def test_cummethods_bool(self):
# GH#6270
def test_cummethods_bool_in_object_dtype(self):

a = pd.Series([False, False, False, True, True, False, False])
b = ~a
c = pd.Series([False] * len(b))
d = ~c
methods = {
"cumsum": np.cumsum,
"cumprod": np.cumprod,
"cummin": np.minimum.accumulate,
"cummax": np.maximum.accumulate,
}
args = product((a, b, c, d), methods)
for s, method in args:
expected = pd.Series(methods[method](s.values))
result = getattr(s, method)()
tm.assert_series_equal(result, expected)

e = pd.Series([False, True, np.nan, False])
ser = pd.Series([False, True, np.nan, False])
cse = pd.Series([0, 1, np.nan, 1], dtype=object)
cpe = pd.Series([False, 0, np.nan, 0])
cmin = pd.Series([False, False, np.nan, False])
cmax = pd.Series([False, True, np.nan, True])
expecteds = {"cumsum": cse, "cumprod": cpe, "cummin": cmin, "cummax": cmax}

for method in methods:
res = getattr(e, method)()
res = getattr(ser, method)()
tm.assert_series_equal(res, expecteds[method])