diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e54819e31c90f..c583651cf4f12 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -900,6 +900,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) +- Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) - Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) - diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 388f751d9cc57..203a5711b7a59 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -228,7 +228,11 @@ def _maybe_cache( unique_dates = unique(arg) if len(unique_dates) < len(arg): cache_dates = convert_listlike(unique_dates, format) - cache_array = Series(cache_dates, index=unique_dates) + # GH#45319 + try: + cache_array = Series(cache_dates, index=unique_dates) + except OutOfBoundsDatetime: + return cache_array # GH#39882 and GH#35888 in case of None and NaT we get duplicates if not cache_array.index.is_unique: cache_array = cache_array[~cache_array.index.duplicated()] diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b128838318ac0..aa5c3324fe0f0 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2777,3 +2777,34 @@ def test_to_datetime_monotonic_increasing_index(cache): result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "series_length", + [40, start_caching_at, (start_caching_at + 1), (start_caching_at + 5)], +) +def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length): + # GH#45319 + s = Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) + ) + result1 = to_datetime(s, errors="coerce", utc=True) + + expected1 = Series( + [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length) + ) + + tm.assert_series_equal(result1, expected1) + + result2 = to_datetime(s, errors="ignore", utc=True) + + expected2 = Series( + [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] + + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) + ) + + tm.assert_series_equal(result2, expected2) + + with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"): + to_datetime(s, errors="raise", utc=True)