Skip to content

Commit

Permalink
Fix pandas datetime decoding with NumPy >= 2.0 for small integer dtyp…
Browse files Browse the repository at this point in the history
…es (#9518)

* Fix pandas datetime decoding with np.int32 values and NumPy >= 2

Thanks @langmore for noting this issue and suggesting this workaround.

* Refine what's new entry
  • Loading branch information
spencerkclark authored Sep 19, 2024
1 parent e313853 commit 17571b5
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 8 deletions.
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ Bug fixes
- Make illegal path-like variable names when constructing a DataTree from a Dataset
(:issue:`9339`, :pull:`9378`)
By `Etienne Schalk <https://github.com/etienneschalk>`_.
- Work around `upstream pandas issue
<https://github.com/pandas-dev/pandas/issues/56996>`_ to ensure that we can
decode times encoded with small integer dtype values (e.g. ``np.int32``) in
environments with NumPy 2.0 or greater without needing to fall back to cftime
(:pull:`9518`). By `Spencer Clark <https://github.com/spencerkclark>`_.
- Fix bug when encoding times with missing values as floats in the case when
the non-missing times could in theory be encoded with integers
(:issue:`9488`, :pull:`9497`). By `Spencer Clark
Expand Down
9 changes: 9 additions & 0 deletions xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ def _decode_datetime_with_pandas(
"pandas."
)

# Work around pandas.to_timedelta issue with dtypes smaller than int64 and
# NumPy 2.0 by casting all int and uint data to int64 and uint64,
# respectively. See https://github.com/pandas-dev/pandas/issues/56996 for
# more details.
if flat_num_dates.dtype.kind == "i":
flat_num_dates = flat_num_dates.astype(np.int64)
elif flat_num_dates.dtype.kind == "u":
flat_num_dates = flat_num_dates.astype(np.uint64)

time_units, ref_date_str = _unpack_netcdf_time_units(units)
time_units = _netcdf_to_numpy_timeunit(time_units)
try:
Expand Down
27 changes: 19 additions & 8 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pandas as pd
import pytest
from pandas.errors import OutOfBoundsDatetime
from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta

from xarray import (
DataArray,
Expand Down Expand Up @@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable():
_should_cftime_be_used(src, "noleap", False)


@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64])
def test_decode_cf_datetime_uint(dtype):
@pytest.mark.parametrize(
"dtype",
[np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64],
)
def test_decode_cf_datetime_varied_integer_dtypes(dtype):
units = "seconds since 2018-08-22T03:23:03Z"
num_dates = dtype(50)
result = decode_cf_datetime(num_dates, units)
# Set use_cftime=False to ensure we cannot mask a failure by falling back
# to cftime.
result = decode_cf_datetime(num_dates, units, use_cftime=False)
expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns"))
np.testing.assert_equal(result, expected)

Expand All @@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime():
np.testing.assert_equal(result, expected)


def test_decode_cf_datetime_uint64_with_pandas_overflow_error():
units = "nanoseconds since 1970-01-01"
calendar = "standard"
num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000)
with pytest.raises(OutOfBoundsTimedelta):
decode_cf_datetime(num_dates, units, calendar, use_cftime=False)


@requires_cftime
def test_decode_cf_datetime_uint64_with_cftime_overflow_error():
units = "microseconds since 1700-01-01"
Expand Down Expand Up @@ -1438,10 +1451,8 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None
"days since 1700-01-01",
np.dtype("int32"),
),
"mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": (
"250YS",
"days since 1700-01-01",
np.dtype("int32"),
"mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param(
"250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime
),
"pandas-encoding-with-default-units-and-dtype": ("250YS", None, None),
}
Expand Down

0 comments on commit 17571b5

Please sign in to comment.