Skip to content

Commit

Permalink
BUG: Fix Rolling where duplicate datetimelike indexes are treated as …
Browse files Browse the repository at this point in the history
…consecutive rather than equal with closed='left' and closed='neither' (#54917)

* Add bugfix for rolling window with nonunique datetimelike index

* Run black

* Add entry to whatsnew

* Fix VariableOffsetWindowIndexer

* Simplify change in indexers.pyx

* Add test
  • Loading branch information
ihsansecer authored Sep 5, 2023
1 parent e1ec244 commit e7a1f9d
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ Performance improvements
Bug fixes
~~~~~~~~~
- Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)

Categorical
^^^^^^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def calculate_variable_window_bounds(
break
# end bound is previous end
# or current index
elif index[end[i - 1]] == end_bound and not right_closed:
end[i] = end[i - 1] + 1
elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0:
end[i] = i + 1
else:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexers/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,9 @@ def get_window_bounds(
# end bound is previous end
# or current index
end_diff = (self.index[end[i - 1]] - end_bound) * index_growth_sign
if end_diff <= zero:
if end_diff == zero and not right_closed:
end[i] = end[i - 1] + 1
elif end_diff <= zero:
end[i] = i + 1
else:
end[i] = end[i - 1]
Expand Down
38 changes: 22 additions & 16 deletions pandas/tests/window/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,20 +466,23 @@ def test_groupby_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [Timestamp("2019-01-01")] * 6,
"column1": range(8),
"column2": range(8),
"group": ["A"] * 4 + ["B"] * 4,
"date": [
Timestamp(date)
for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
]
* 2,
}
)
result = (
df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=MultiIndex.from_tuples(
[("A", Timestamp("2019-01-01"))] * 3
+ [("B", Timestamp("2019-01-01"))] * 3,
[np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
index=MultiIndex.from_frame(
df[["group", "date"]],
names=["group", "date"],
),
name="column1",
Expand All @@ -490,10 +493,14 @@ def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [Timestamp("2019-01-01")] * 6,
"column1": range(8),
"column2": range(8),
"group": ["A"] * 4 + ["B"] * 4,
"date": [
Timestamp(date)
for date in ["2019-01-01", "2019-01-01", "2019-01-02", "2019-01-02"]
]
* 2,
}
)

Expand All @@ -503,10 +510,9 @@ def test_groupby_subset_rolling_subset_with_closed(self):
.sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=MultiIndex.from_tuples(
[("A", Timestamp("2019-01-01"))] * 3
+ [("B", Timestamp("2019-01-01"))] * 3,
[np.nan, np.nan, 1.0, 1.0, np.nan, np.nan, 9.0, 9.0],
index=MultiIndex.from_frame(
df[["group", "date"]],
names=["group", "date"],
),
name="column1",
Expand Down
70 changes: 70 additions & 0 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,76 @@ def test_datetimelike_nonunique_index_centering(
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"closed,expected",
[
("left", [np.nan, np.nan, 1, 1, 1, 10, 14, 14, 18, 21]),
("neither", [np.nan, np.nan, 1, 1, 1, 9, 5, 5, 13, 8]),
("right", [0, 1, 3, 6, 10, 14, 11, 18, 21, 17]),
("both", [0, 1, 3, 6, 10, 15, 20, 27, 26, 30]),
],
)
def test_variable_window_nonunique(closed, expected, frame_or_series):
# GH 20712
index = DatetimeIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-02",
"2011-01-02",
"2011-01-03",
"2011-01-04",
"2011-01-04",
"2011-01-05",
"2011-01-06",
]
)

df = frame_or_series(range(10), index=index, dtype=float)
expected = frame_or_series(expected, index=index, dtype=float)

result = df.rolling("2D", closed=closed).sum()

tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"closed,expected",
[
("left", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 18, 21]),
("neither", [np.nan, np.nan, 1, 1, 1, 10, 15, 15, 13, 8]),
("right", [0, 1, 3, 6, 10, 15, 21, 28, 21, 17]),
("both", [0, 1, 3, 6, 10, 15, 21, 28, 26, 30]),
],
)
def test_variable_offset_window_nonunique(closed, expected, frame_or_series):
# GH 20712
index = DatetimeIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-02",
"2011-01-02",
"2011-01-03",
"2011-01-04",
"2011-01-04",
"2011-01-05",
"2011-01-06",
]
)

df = frame_or_series(range(10), index=index, dtype=float)
expected = frame_or_series(expected, index=index, dtype=float)

offset = BusinessDay(2)
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
result = df.rolling(indexer, closed=closed, min_periods=1).sum()

tm.assert_equal(result, expected)


def test_even_number_window_alignment():
# see discussion in GH 38780
s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))
Expand Down

0 comments on commit e7a1f9d

Please sign in to comment.