Skip to content

Commit

Permalink
Improve to_datetime bounds checking (#50183)
Browse files Browse the repository at this point in the history
* add test for float to_datetime near overflow bounds

* fix float to_datetime near overflow bounds

* fix typo and formatting

* fix formatting

* fix test to not fail on rounding differences

* don't use approximate comparison on datetimes, it doesn't work

* also can't convert datetime to float

* match dtypes

* TST: don't try to use non-integer years (see #50301)

* TST: don't cross an integer

(tsmax_in_days happens to be close to an integer,
and this is a test of rounding)

* PERF: remove unnecessary copy

* add whatsnew
  • Loading branch information
rebecca-palmer authored Jan 20, 2023
1 parent 99f98de commit 0b04174
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ Datetimelike
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`)

Timedelta
^^^^^^^^^
Expand Down
12 changes: 3 additions & 9 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
elif arg.dtype.kind == "f":
mult, _ = precision_from_unit(unit)

iresult = arg.astype("i8")
mask = np.isnan(arg) | (arg == iNaT)
iresult[mask] = 0

fvalues = iresult.astype("f8") * mult
fvalues = (arg * mult).astype("f8", copy=False)
fvalues[mask] = 0

if (fvalues < Timestamp.min.value).any() or (
fvalues > Timestamp.max.value
Expand All @@ -528,11 +526,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
return _to_datetime_with_unit(arg, unit, name, utc, errors)
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")

# TODO: is fresult meaningfully different from fvalues?
fresult = (arg * mult).astype("f8")
fresult[mask] = 0

arr = fresult.astype("M8[ns]", copy=False)
arr = fvalues.astype("M8[ns]", copy=False)
arr[mask] = np.datetime64("NaT", "ns")

tz_parsed = None
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,27 @@ def test_to_timestamp_unit_coerce(self, bad_val):
result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
tm.assert_index_equal(result, expected)

def test_float_to_datetime_raise_near_bounds(self):
# GH50183
msg = "cannot convert input with unit 'D'"
oneday_in_ns = 1e9 * 60 * 60 * 24
tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days
# just in bounds
should_succeed = Series(
[0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float
)
expected = (should_succeed * oneday_in_ns).astype(np.int64)
for error_mode in ["raise", "coerce", "ignore"]:
result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10)
# just out of bounds
should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)
with pytest.raises(OutOfBoundsDatetime, match=msg):
to_datetime(should_fail1, unit="D", errors="raise")
with pytest.raises(OutOfBoundsDatetime, match=msg):
to_datetime(should_fail2, unit="D", errors="raise")


class TestToDatetimeDataFrame:
@pytest.fixture
Expand Down

0 comments on commit 0b04174

Please sign in to comment.