Skip to content

Commit 6e49c2a

Browse files
committed
Add time_unit argument to CFTimeIndex.to_datetimeindex
1 parent 1126c9b commit 6e49c2a

File tree

8 files changed

+106
-48
lines changed

8 files changed

+106
-48
lines changed

doc/whats-new.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ New Features
5454
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Spencer Clark <https://github.com/spencerkclark>`_.
5555
- Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`)
5656
By `Jimmy Westling <https://github.com/illviljan>`_.
57+
- Added a ``time_unit`` argument to :py:meth:`CFTimeIndex.to_datetimeindex`.
58+
Note that in a future version of xarray,
59+
:py:meth:`CFTimeIndex.to_datetimeindex` will return a microsecond-resolution
60+
:py:class:`pandas.DatetimeIndex` instead of a nanosecond-resolution
61+
:py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark
62+
<https://github.com/spencerkclark>`_ and `Kai Mühlbauer
63+
<https://github.com/kmuehlbauer>`_.
5764

5865
Breaking changes
5966
~~~~~~~~~~~~~~~~

xarray/coding/cftimeindex.py

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,8 @@
4242
from __future__ import annotations
4343

4444
import math
45-
import warnings
4645
from datetime import timedelta
47-
from typing import TYPE_CHECKING, Any
46+
from typing import TYPE_CHECKING, Any, Optional
4847

4948
import numpy as np
5049
import pandas as pd
@@ -58,7 +57,8 @@
5857
)
5958
from xarray.core.common import _contains_cftime_datetimes
6059
from xarray.core.options import OPTIONS
61-
from xarray.core.utils import attempt_import, is_scalar
60+
from xarray.core.types import PDDatetimeUnitOptions
61+
from xarray.core.utils import attempt_import, emit_user_level_warning, is_scalar
6262

6363
if TYPE_CHECKING:
6464
from xarray.coding.cftime_offsets import BaseCFTimeOffset
@@ -239,6 +239,8 @@ class CFTimeIndex(pd.Index):
239239
cftime_range
240240
"""
241241

242+
_data: np.ndarray
243+
242244
year = _field_accessor("year", "The year of the datetime")
243245
month = _field_accessor("month", "The month of the datetime")
244246
day = _field_accessor("day", "The days of the datetime")
@@ -544,14 +546,18 @@ def __rsub__(self, other):
544546
"that can be expressed at the nanosecond resolution."
545547
) from err
546548

547-
def to_datetimeindex(self, unsafe=False):
549+
def to_datetimeindex(
550+
self, unsafe: bool = False, time_unit: Optional[PDDatetimeUnitOptions] = None
551+
) -> pd.DatetimeIndex:
548552
"""If possible, convert this index to a pandas.DatetimeIndex.
549553
550554
Parameters
551555
----------
552556
unsafe : bool
553-
Flag to turn off warning when converting from a CFTimeIndex with
554-
a non-standard calendar to a DatetimeIndex (default ``False``).
557+
Flag to turn off calendar mismatch warnings (default ``False``).
558+
time_unit : str
559+
Time resolution of resulting DatetimeIndex. Can be one of `"s"`,
560+
``"ms"``, ``"us"``, or ``"ns"`` (default ``"ns"``).
555561
556562
Returns
557563
-------
@@ -561,45 +567,68 @@ def to_datetimeindex(self, unsafe=False):
561567
------
562568
ValueError
563569
If the CFTimeIndex contains dates that are not possible in the
564-
standard calendar or outside the nanosecond-precision range.
570+
standard calendar or outside the range representable by the
571+
specified ``time_unit``.
565572
566573
Warns
567574
-----
568575
RuntimeWarning
569-
If converting from a non-standard calendar to a DatetimeIndex.
576+
If converting from a non-standard calendar, or a Gregorian
577+
calendar with dates prior to the reform (1582-10-15).
570578
571579
Warnings
572580
--------
573-
Note that for non-standard calendars, this will change the calendar
574-
type of the index. In that case the result of this method should be
575-
used with caution.
581+
Note that for non-proleptic Gregorian calendars, this will change the
582+
calendar type of the index. In that case the result of this method
583+
should be used with caution.
576584
577585
Examples
578586
--------
579587
>>> times = xr.cftime_range("2000", periods=2, calendar="gregorian")
580588
>>> times
581589
CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00],
582590
dtype='object', length=2, calendar='standard', freq=None)
583-
>>> times.to_datetimeindex()
584-
DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[us]', freq=None)
591+
>>> times.to_datetimeindex(time_unit="ns")
592+
DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
585593
"""
586594

587595
if not self._data.size:
588596
return pd.DatetimeIndex([])
589597

590-
# transform to us-resolution is needed for DatetimeIndex
591-
nptimes = cftime_to_nptime(self, time_unit="us")
598+
if time_unit is None:
599+
emit_user_level_warning(
600+
"In a future version of xarray to_datetimeindex will default "
601+
"to returning a 'us'-resolution DatetimeIndex instead of a "
602+
"'ns'-resolution DatetimeIndex. This warning can be silenced "
603+
"by explicitly setting the time_unit of the index returned.",
604+
FutureWarning,
605+
)
606+
time_unit = "ns"
607+
608+
nptimes = cftime_to_nptime(self, time_unit=time_unit)
592609
calendar = infer_calendar_name(self)
593610
if calendar not in _STANDARD_CALENDARS and not unsafe:
594-
warnings.warn(
611+
emit_user_level_warning(
595612
"Converting a CFTimeIndex with dates from a non-standard "
596-
f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which uses dates "
597-
"from the standard calendar. This may lead to subtle errors "
598-
"in operations that depend on the length of time between "
599-
"dates.",
613+
f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which "
614+
"uses dates from the standard calendar. This may lead to "
615+
"subtle errors in operations that depend on the length of "
616+
"time between dates.",
600617
RuntimeWarning,
601-
stacklevel=2,
602618
)
619+
if calendar == "standard" and not unsafe:
620+
reform_date = self.date_type(1582, 10, 15)
621+
if self.min() < reform_date:
622+
emit_user_level_warning(
623+
"Converting a CFTimeIndex with dates from a Gregorian "
624+
"calendar that fall before the reform date of 1582-10-15 "
625+
"to a pandas.DatetimeIndex. During this time period the "
626+
"Gregorian calendar and the proleptic Gregorian calendar "
627+
"of the DatetimeIndex do not exactly align. This warning "
628+
"can be silenced by setting unsafe=True.",
629+
RuntimeWarning,
630+
)
631+
603632
return pd.DatetimeIndex(nptimes)
604633

605634
def strftime(self, date_format):

xarray/tests/test_accessor_dt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,9 @@ def test_cftime_strftime_access(data) -> None:
519519
date_format = "%Y%m%d%H"
520520
result = data.time.dt.strftime(date_format)
521521
datetime_array = xr.DataArray(
522-
xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex(),
522+
xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex(
523+
time_unit="ns"
524+
),
523525
name="stftime",
524526
coords=data.time.coords,
525527
dims=data.time.dims,

xarray/tests/test_cftime_offsets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,7 +1672,7 @@ def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected):
16721672
)
16731673
def test_cftime_range_same_as_pandas(start, end, freq):
16741674
result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True)
1675-
result = result.to_datetimeindex()
1675+
result = result.to_datetimeindex(time_unit="ns")
16761676
expected = date_range(start, end, freq=freq, use_cftime=False)
16771677

16781678
np.testing.assert_array_equal(result, expected)
@@ -1694,8 +1694,8 @@ def test_cftime_range_no_freq(start, end, periods):
16941694
when freq is not provided, but start, end and periods are.
16951695
"""
16961696
# Generate date ranges using cftime_range
1697-
result = cftime_range(start=start, end=end, periods=periods)
1698-
result = result.to_datetimeindex()
1697+
cftimeindex = cftime_range(start=start, end=end, periods=periods)
1698+
result = cftimeindex.to_datetimeindex(time_unit="ns")
16991699
expected = pd.date_range(start=start, end=end, periods=periods)
17001700

17011701
np.testing.assert_array_equal(result, expected)

xarray/tests/test_cftimeindex.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,43 +1219,59 @@ def test_strftime_of_cftime_array(calendar):
12191219
@requires_cftime
12201220
@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
12211221
@pytest.mark.parametrize("unsafe", [False, True])
1222-
def test_to_datetimeindex(calendar, unsafe):
1222+
def test_to_datetimeindex(calendar, unsafe) -> None:
12231223
index = xr.cftime_range("2000", periods=5, calendar=calendar)
1224-
expected = pd.date_range("2000", periods=5, unit="us")
1224+
expected = pd.date_range("2000", periods=5, unit="ns")
12251225

12261226
if calendar in _NON_STANDARD_CALENDARS and not unsafe:
12271227
with pytest.warns(RuntimeWarning, match="non-standard"):
1228-
result = index.to_datetimeindex()
1228+
result = index.to_datetimeindex(time_unit="ns")
12291229
else:
1230-
result = index.to_datetimeindex(unsafe=unsafe)
1230+
result = index.to_datetimeindex(unsafe=unsafe, time_unit="ns")
12311231

12321232
assert result.equals(expected)
12331233
np.testing.assert_array_equal(result, expected)
12341234
assert isinstance(result, pd.DatetimeIndex)
12351235

12361236

1237+
@requires_cftime
1238+
def test_to_datetimeindex_future_warning() -> None:
1239+
index = xr.cftime_range("2000", periods=5)
1240+
expected = pd.date_range("2000", periods=5, unit="ns")
1241+
with pytest.warns(FutureWarning, match="In a future version"):
1242+
result = index.to_datetimeindex()
1243+
assert result.equals(expected)
1244+
assert result.dtype == expected.dtype
1245+
1246+
12371247
@requires_cftime
12381248
@pytest.mark.parametrize("calendar", _ALL_CALENDARS)
1239-
def test_to_datetimeindex_out_of_range(calendar):
1249+
def test_to_datetimeindex_out_of_range(calendar) -> None:
12401250
index = xr.cftime_range("0001", periods=5, calendar=calendar)
1241-
# todo: suggestion from code review:
1242-
# - still warn when converting from a non-standard calendar
1243-
# to a proleptic Gregorian calendar
1244-
# - also warn when converting from a Gregorian calendar
1245-
# to a proleptic Gregorian calendar when dates fall before the reform
1246-
if calendar in _NON_STANDARD_CALENDARS:
1247-
with pytest.warns(RuntimeWarning, match="non-standard"):
1248-
index.to_datetimeindex()
1251+
with pytest.raises(ValueError, match="0001"):
1252+
index.to_datetimeindex(time_unit="ns")
1253+
1254+
1255+
@requires_cftime
1256+
@pytest.mark.parametrize("unsafe", [False, True])
1257+
def test_to_datetimeindex_gregorian_pre_reform(unsafe) -> None:
1258+
index = xr.cftime_range("1582", periods=5, calendar="gregorian")
1259+
if unsafe:
1260+
result = index.to_datetimeindex(time_unit="us", unsafe=unsafe)
12491261
else:
1250-
index.to_datetimeindex()
1262+
with pytest.warns(RuntimeWarning, match="reform"):
1263+
result = index.to_datetimeindex(time_unit="us", unsafe=unsafe)
1264+
expected = pd.date_range("1582", periods=5, unit="us")
1265+
assert result.equals(expected)
1266+
assert result.dtype == expected.dtype
12511267

12521268

12531269
@requires_cftime
12541270
@pytest.mark.parametrize("calendar", ["all_leap", "360_day"])
1255-
def test_to_datetimeindex_feb_29(calendar):
1271+
def test_to_datetimeindex_feb_29(calendar) -> None:
12561272
index = xr.cftime_range("2001-02-28", periods=2, calendar=calendar)
12571273
with pytest.raises(ValueError, match="29"):
1258-
index.to_datetimeindex()
1274+
index.to_datetimeindex(time_unit="ns")
12591275

12601276

12611277
@pytest.mark.xfail(reason="fails on pandas main branch")
@@ -1271,10 +1287,10 @@ def test_multiindex():
12711287
@pytest.mark.parametrize("method", ["floor", "ceil", "round"])
12721288
def test_rounding_methods_against_datetimeindex(freq, method):
12731289
# for now unit="us" seems good enough
1274-
expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="us")
1290+
expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="ns")
12751291
expected = getattr(expected, method)(freq)
12761292
result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777s")
1277-
result = getattr(result, method)(freq).to_datetimeindex()
1293+
result = getattr(result, method)(freq).to_datetimeindex(time_unit="ns")
12781294
assert result.equals(expected)
12791295

12801296

xarray/tests/test_cftimeindex_resample.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ def compare_against_pandas(
9797
).mean()
9898
# TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
9999
result_cftimeindex["time"] = (
100-
result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex()
100+
result_cftimeindex.xindexes["time"]
101+
.to_pandas_index()
102+
.to_datetimeindex(time_unit="ns")
101103
)
102104
xr.testing.assert_identical(result_cftimeindex, result_datetimeindex)
103105

@@ -181,7 +183,7 @@ def test_calendars(calendar: str) -> None:
181183
# TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
182184
new_pd_index = da_cftime.xindexes["time"].to_pandas_index()
183185
assert isinstance(new_pd_index, CFTimeIndex) # shouldn't that be a pd.Index?
184-
da_cftime["time"] = new_pd_index.to_datetimeindex()
186+
da_cftime["time"] = new_pd_index.to_datetimeindex(time_unit="ns")
185187
xr.testing.assert_identical(da_cftime, da_datetime)
186188

187189

xarray/tests/test_groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,7 +1868,7 @@ def test_resample(
18681868
def resample_as_pandas(array, *args, **kwargs):
18691869
array_ = array.copy(deep=True)
18701870
if use_cftime:
1871-
array_["time"] = times.to_datetimeindex()
1871+
array_["time"] = times.to_datetimeindex(time_unit="ns")
18721872
result = DataArray.from_series(
18731873
array_.to_series().resample(*args, **kwargs).mean()
18741874
)
@@ -2321,7 +2321,7 @@ def test_resample(
23212321
def resample_as_pandas(ds, *args, **kwargs):
23222322
ds_ = ds.copy(deep=True)
23232323
if use_cftime:
2324-
ds_["time"] = times.to_datetimeindex()
2324+
ds_["time"] = times.to_datetimeindex(time_unit="ns")
23252325
result = Dataset.from_dataframe(
23262326
ds_.to_dataframe().resample(*args, **kwargs).mean()
23272327
)

xarray/tests/test_missing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,9 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar):
609609
def test_get_clean_interp_index_dt(cf_da, calendar, freq):
610610
"""In the gregorian case, the index should be proportional to normal datetimes."""
611611
g = cf_da(calendar, freq=freq)
612-
g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",))
612+
g["stime"] = xr.Variable(
613+
data=g.time.to_index().to_datetimeindex(time_unit="ns"), dims=("time",)
614+
)
613615

614616
gi = get_clean_interp_index(g, "time")
615617
si = get_clean_interp_index(g, "time", use_coordinate="stime")

0 commit comments

Comments
 (0)