Skip to content

BUG: Fix a bug in 'timedelta_range' that produced an extra point on a edge case (fix #30353) #33498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 9, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
CLN: clean timedelta_range and date_range
  • Loading branch information
hasB4K committed May 9, 2020
commit bd7e802c8d596743d51fb05ac1349b83d03153af
67 changes: 16 additions & 51 deletions pandas/core/arrays/_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,64 +3,30 @@
(and possibly TimedeltaArray/PeriodArray)
"""

from typing import Tuple
from typing import Union

import numpy as np

from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp

from pandas.tseries.offsets import DateOffset, Tick, generate_range
from pandas.tseries.offsets import DateOffset


def generate_timestamps_range(
start: Timestamp, end: Timestamp, periods: int, freq: DateOffset
) -> Tuple[np.ndarray, str]:
"""
Generate a range of dates with the spans between dates described by
the given `freq` DateOffset.

Parameters
----------
start : Timestamp or None
first point of produced date range
end : Timestamp or None
last point of produced date range
periods : int
number of periods in produced date range
freq : DateOffset
describes space between dates in produced date range

Returns
-------
(tuple): containing:

values : ndarray[np.int64] representing nanosecond unix timestamps
tz : the timezone of the range
"""
if isinstance(freq, Tick):
start_value = Timestamp(start).value if start is not None else None
end_value = Timestamp(end).value if end is not None else None
values = _generate_regular_range(start_value, end_value, periods, freq.nanos)
else:
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
values = np.array([x.value for x in xdr], dtype=np.int64)

tz = start.tz if start is not None else end.tz
return values, tz


def generate_timedeltas_range(
start: Timedelta, end: Timedelta, periods: int, freq: DateOffset
def generate_regular_range(
start: Union[Timestamp, Timedelta],
end: Union[Timestamp, Timedelta],
periods: int,
freq: DateOffset,
):
"""
Generate a range of dates with the spans between dates described by
the given `freq` DateOffset.
Generate a range of dates or timestamps with the spans between dates
described by the given `freq` DateOffset.

Parameters
----------
start : Timedelta or None
start : Timedelta, Timestamp or None
first point of produced date range
end : Timedelta or None
start : Timedelta, Timestamp or None
last point of produced date range
periods : int
number of periods in produced date range
Expand All @@ -69,14 +35,13 @@ def generate_timedeltas_range(

Returns
-------
ndarray[np.int64] representing nanosecond timedeltas
ndarray[np.int64]
Representing nanosecond unix timestamps.
"""
start_value = Timedelta(start).value if start is not None else None
end_value = Timedelta(end).value if end is not None else None
return _generate_regular_range(start_value, end_value, periods, freq.nanos)

start = start.value if start is not None else None
end = end.value if end is not None else None
stride = freq.nanos

def _generate_regular_range(start: int, end: int, periods: int, stride: int):
b = start
if periods is None:
# cannot just use e = Timestamp(end) + 1 because arange breaks when
Expand Down
31 changes: 13 additions & 18 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@

from pandas.core.algorithms import checked_add_with_arr
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_timestamps_range
from pandas.core.arrays._ranges import generate_regular_range
import pandas.core.common as com

from pandas.tseries.frequencies import get_period_alias, to_offset
from pandas.tseries.offsets import Day, Tick
from pandas.tseries.offsets import Day, Tick, generate_range

_midnight = time(0, 0)

Expand Down Expand Up @@ -370,33 +370,22 @@ def _generate_range(
if end is not None:
end = Timestamp(end)

if start is None and end is None:
if closed is not None:
raise ValueError(
"Closed has to be None if not both of start and end are defined"
)
if start is NaT or end is NaT:
raise ValueError("Neither `start` nor `end` can be NaT")

left_closed, right_closed = dtl.validate_endpoints(closed)

start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)

tz = _infer_tz_from_endpoints(start, end, tz)

if tz is not None:
# Localize the start and end arguments
start_tz = None if start is None else start.tz
end_tz = None if end is None else end.tz
start = _maybe_localize_point(
start,
getattr(start, "tz", None),
start,
freq,
tz,
ambiguous,
nonexistent,
start, start_tz, start, freq, tz, ambiguous, nonexistent
)
end = _maybe_localize_point(
end, getattr(end, "tz", None), end, freq, tz, ambiguous, nonexistent
end, end_tz, end, freq, tz, ambiguous, nonexistent
)
if freq is not None:
# We break Day arithmetic (fixed 24 hour) here and opt for
Expand All @@ -408,7 +397,13 @@ def _generate_range(
if end is not None:
end = end.tz_localize(None)

values, _tz = generate_timestamps_range(start, end, periods, freq)
if isinstance(freq, Tick):
values = generate_regular_range(start, end, periods, freq)
else:
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
values = np.array([x.value for x in xdr], dtype=np.int64)

_tz = start.tz if start is not None else end.tz
index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))

if tz is not None and index.tz is None:
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from pandas.core import nanops
from pandas.core.algorithms import checked_add_with_arr
from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays._ranges import generate_timedeltas_range
from pandas.core.arrays._ranges import generate_regular_range
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.ops.common import unpack_zerodim_and_defer
Expand Down Expand Up @@ -256,16 +256,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
if end is not None:
end = Timedelta(end)

if start is None and end is None:
if closed is not None:
raise ValueError(
"Closed has to be None if not both of start and end are defined"
)

left_closed, right_closed = dtl.validate_endpoints(closed)

if freq is not None:
index = generate_timedeltas_range(start, end, periods, freq)
index = generate_regular_range(start, end, periods, freq)
else:
index = np.linspace(start.value, end.value, periods).astype("i8")
if len(index) >= 2:
Expand Down