Skip to content

Commit

Permalink
Fix date_range(start, end, freq) when end-start is divisible by freq (
Browse files Browse the repository at this point in the history
#16516)

xref #16507

`date_range` generates its dates via `range`, and the end of this range was calculated via `math.ceil((end - start) / freq)`. If `(end - start) / freq` did not produce a remainder, `math.ceil` would not correctly increment this value by `1` to capture the last date.

Instead, this PR uses `math.floor((end - start) / freq) + 1` to always ensure the last date is captured

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #16516
  • Loading branch information
mroeschke authored Aug 9, 2024
1 parent 16aa0ea commit 4cd87d3
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
6 changes: 4 additions & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2414,11 +2414,13 @@ def day_name(self, locale: str | None = None) -> Index:
>>> datetime_index = cudf.date_range("2016-12-31", "2017-01-08", freq="D")
>>> datetime_index
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07'],
'2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
'2017-01-08'],
dtype='datetime64[ns]', freq='D')
>>> datetime_index.day_name()
Index(['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'], dtype='object')
'Friday', 'Saturday', 'Sunday'],
dtype='object')
"""
day_names = self._column.get_day_names(locale)
return Index._from_data({self.name: day_names})
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,14 +801,17 @@ def dt(self):
>>> s.dt.hour
0 12
1 13
2 14
dtype: int16
>>> s.dt.second
0 0
1 0
2 0
dtype: int16
>>> s.dt.day
0 3
1 3
2 3
dtype: int16
Returns
Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,17 +951,18 @@ def date_range(
end = cudf.Scalar(end, dtype=dtype)
_is_increment_sequence = end >= start

periods = math.ceil(
periods = math.floor(
int(end - start) / _offset_to_nanoseconds_lower_bound(offset)
)

if periods < 0:
# Mismatched sign between (end-start) and offset, return empty
# column
periods = 0
elif periods == 0:
# end == start, return exactly 1 timestamp (start)
periods = 1
else:
# If end == start, periods == 0 and we return exactly 1 timestamp (start).
# Otherwise, since closed="both", we ensure the end point is included.
periods += 1

# We compute `end_estim` (the estimated upper bound of the date
# range) below, but don't always use it. We do this to ensure
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2536,3 +2536,9 @@ def test_dti_methods(method, kwargs):
result = getattr(cudf_dti, method)(**kwargs)
expected = getattr(pd_dti, method)(**kwargs)
assert_eq(result, expected)


def test_date_range_start_end_divisible_by_freq():
result = cudf.date_range("2011-01-01", "2011-01-02", freq="h")
expected = pd.date_range("2011-01-01", "2011-01-02", freq="h")
assert_eq(result, expected)

0 comments on commit 4cd87d3

Please sign in to comment.