Skip to content

Commit 2c272e2

Browse files
BUG: to_datetime incorrect OverflowError (#50533)
* BUG: to_datetime incorrect OverflowError * Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 41ab44a commit 2c272e2

File tree

4 files changed

+40
-16
lines changed

4 files changed

+40
-16
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ Categorical
800800
Datetimelike
801801
^^^^^^^^^^^^
802802
- Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`)
803+
- Bug in :func:`to_datetime` incorrectly raising ``OverflowError`` with string arguments corresponding to large integers (:issue:`50533`)
803804
- Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`)
804805
- Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`)
805806
- Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`)

pandas/_libs/tslibs/parsing.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ from pandas._libs.tslibs.nattype cimport (
5353
c_NaT as NaT,
5454
c_nat_strings as nat_strings,
5555
)
56+
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
5657
from pandas._libs.tslibs.np_datetime cimport (
5758
NPY_DATETIMEUNIT,
5859
npy_datetimestruct,
@@ -298,6 +299,12 @@ def parse_datetime_string(
298299
# following may be raised from dateutil
299300
# TypeError: 'NoneType' object is not iterable
300301
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
302+
except OverflowError as err:
303+
# with e.g. "08335394550" dateutil raises when trying to pass
304+
# year=8335394550 to datetime.replace
305+
raise OutOfBoundsDatetime(
306+
f'Parsing "{date_string}" to datetime overflows'
307+
) from err
301308

302309
return dt
303310

pandas/core/arrays/datetimes.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,7 @@
5050
TimeNonexistent,
5151
npt,
5252
)
53-
from pandas.errors import (
54-
OutOfBoundsDatetime,
55-
PerformanceWarning,
56-
)
53+
from pandas.errors import PerformanceWarning
5754
from pandas.util._exceptions import find_stack_level
5855
from pandas.util._validators import validate_inclusive
5956

@@ -2154,18 +2151,14 @@ def objects_to_datetime64ns(
21542151

21552152
flags = data.flags
21562153
order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
2157-
try:
2158-
result, tz_parsed = tslib.array_to_datetime(
2159-
data.ravel("K"),
2160-
errors=errors,
2161-
utc=utc,
2162-
dayfirst=dayfirst,
2163-
yearfirst=yearfirst,
2164-
)
2165-
result = result.reshape(data.shape, order=order)
2166-
except OverflowError as err:
2167-
# Exception is raised when a part of date is greater than 32 bit signed int
2168-
raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err
2154+
result, tz_parsed = tslib.array_to_datetime(
2155+
data.ravel("K"),
2156+
errors=errors,
2157+
utc=utc,
2158+
dayfirst=dayfirst,
2159+
yearfirst=yearfirst,
2160+
)
2161+
result = result.reshape(data.shape, order=order)
21692162

21702163
if tz_parsed is not None:
21712164
# We can take a shortcut since the datetime64 numpy array

pandas/tests/tools/test_to_datetime.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,29 @@ def test_to_datetime_parse_timezone_keeps_name(self):
542542

543543

544544
class TestToDatetime:
545+
@pytest.mark.filterwarnings("ignore:Could not infer format")
546+
def test_to_datetime_overflow(self):
547+
# we should get an OutOfBoundsDatetime, NOT OverflowError
548+
# TODO: Timestamp raises VaueError("could not convert string to Timestamp")
549+
# can we make these more consistent?
550+
arg = "08335394550"
551+
msg = 'Parsing "08335394550" to datetime overflows, at position 0'
552+
with pytest.raises(OutOfBoundsDatetime, match=msg):
553+
to_datetime(arg)
554+
555+
with pytest.raises(OutOfBoundsDatetime, match=msg):
556+
to_datetime([arg])
557+
558+
res = to_datetime(arg, errors="coerce")
559+
assert res is NaT
560+
res = to_datetime([arg], errors="coerce")
561+
tm.assert_index_equal(res, Index([NaT]))
562+
563+
res = to_datetime(arg, errors="ignore")
564+
assert isinstance(res, str) and res == arg
565+
res = to_datetime([arg], errors="ignore")
566+
tm.assert_index_equal(res, Index([arg], dtype=object))
567+
545568
def test_to_datetime_mixed_datetime_and_string(self):
546569
# GH#47018 adapted old doctest with new behavior
547570
d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))

0 commit comments

Comments
 (0)