Skip to content

Commit a91d268

Browse files
mathausemax-sixtyspencerkclark
authored
unify freq strings (independent of pd version) (#8627)
* unify freq strings (independent of pd version) * Update xarray/tests/test_cftime_offsets.py Co-authored-by: Spencer Clark <spencerkclark@gmail.com> * update code and tests * make mypy happy * add 'YE' to _ANNUAL_OFFSET_TYPES * un x-fail test * adapt more freq strings * simplify test * also translate 'h', 'min', 's' * add comment * simplify test * add freqs, invert ifs; add try block * properly invert if condition * fix more tests * fix comment * whats new * test pd freq strings are passed through --------- Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
1 parent fffb03c commit a91d268

17 files changed

+313
-113
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ New Features
4242
Breaking changes
4343
~~~~~~~~~~~~~~~~
4444

45+
- :py:func:`infer_freq` always returns the frequency strings as defined in pandas 2.2
46+
(:issue:`8612`, :pull:`8627`). By `Mathias Hauser <https://github.com/mathause>`_.
4547

4648
Deprecations
4749
~~~~~~~~~~~~

xarray/coding/cftime_offsets.py

Lines changed: 96 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ def _emit_freq_deprecation_warning(deprecated_freq):
751751
emit_user_level_warning(message, FutureWarning)
752752

753753

754-
def to_offset(freq):
754+
def to_offset(freq, warn=True):
755755
"""Convert a frequency string to the appropriate subclass of
756756
BaseCFTimeOffset."""
757757
if isinstance(freq, BaseCFTimeOffset):
@@ -763,7 +763,7 @@ def to_offset(freq):
763763
raise ValueError("Invalid frequency string provided")
764764

765765
freq = freq_data["freq"]
766-
if freq in _DEPRECATED_FREQUENICES:
766+
if warn and freq in _DEPRECATED_FREQUENICES:
767767
_emit_freq_deprecation_warning(freq)
768768
multiples = freq_data["multiple"]
769769
multiples = 1 if multiples is None else int(multiples)
@@ -1229,7 +1229,8 @@ def date_range(
12291229
start=start,
12301230
end=end,
12311231
periods=periods,
1232-
freq=freq,
1232+
# TODO remove translation once requiring pandas >= 2.2
1233+
freq=_new_to_legacy_freq(freq),
12331234
tz=tz,
12341235
normalize=normalize,
12351236
name=name,
@@ -1257,6 +1258,96 @@ def date_range(
12571258
)
12581259

12591260

1261+
def _new_to_legacy_freq(freq):
1262+
# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
1263+
# frequencies, but older versions of pandas do not support these as
1264+
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
1265+
# we add logic to continue using the deprecated "M" and "Q" frequency
1266+
# strings in these circumstances.
1267+
1268+
# NOTE: other conversions ("h" -> "H", ..., "ns" -> "N") not required
1269+
1270+
# TODO: remove once requiring pandas >= 2.2
1271+
if not freq or Version(pd.__version__) >= Version("2.2"):
1272+
return freq
1273+
1274+
try:
1275+
freq_as_offset = to_offset(freq)
1276+
except ValueError:
1277+
# freq may be valid in pandas but not in xarray
1278+
return freq
1279+
1280+
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
1281+
freq = freq.replace("ME", "M")
1282+
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
1283+
freq = freq.replace("QE", "Q")
1284+
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
1285+
freq = freq.replace("YS", "AS")
1286+
elif isinstance(freq_as_offset, YearEnd):
1287+
# testing for "Y" is required as this was valid in xarray 2023.11 - 2024.01
1288+
if "Y-" in freq:
1289+
# Check for and replace "Y-" instead of just "Y" to prevent
1290+
# corrupting anchored offsets that contain "Y" in the month
1291+
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
1292+
freq = freq.replace("Y-", "A-")
1293+
elif "YE-" in freq:
1294+
freq = freq.replace("YE-", "A-")
1295+
elif "A-" not in freq and freq.endswith("Y"):
1296+
freq = freq.replace("Y", "A")
1297+
elif freq.endswith("YE"):
1298+
freq = freq.replace("YE", "A")
1299+
1300+
return freq
1301+
1302+
1303+
def _legacy_to_new_freq(freq):
1304+
# to avoid internal deprecation warnings when freq is determined using pandas < 2.2
1305+
1306+
# TODO: remove once requiring pandas >= 2.2
1307+
1308+
if not freq or Version(pd.__version__) >= Version("2.2"):
1309+
return freq
1310+
1311+
try:
1312+
freq_as_offset = to_offset(freq, warn=False)
1313+
except ValueError:
1314+
# freq may be valid in pandas but not in xarray
1315+
return freq
1316+
1317+
if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq:
1318+
freq = freq.replace("M", "ME")
1319+
elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq:
1320+
freq = freq.replace("Q", "QE")
1321+
elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq:
1322+
freq = freq.replace("AS", "YS")
1323+
elif isinstance(freq_as_offset, YearEnd):
1324+
if "A-" in freq:
1325+
# Check for and replace "A-" instead of just "A" to prevent
1326+
# corrupting anchored offsets that contain "Y" in the month
1327+
# abbreviation, e.g. "A-MAY" -> "YE-MAY".
1328+
freq = freq.replace("A-", "YE-")
1329+
elif "Y-" in freq:
1330+
freq = freq.replace("Y-", "YE-")
1331+
elif freq.endswith("A"):
1332+
# the "A-MAY" case is already handled above
1333+
freq = freq.replace("A", "YE")
1334+
elif "YE" not in freq and freq.endswith("Y"):
1335+
# the "Y-MAY" case is already handled above
1336+
freq = freq.replace("Y", "YE")
1337+
elif isinstance(freq_as_offset, Hour):
1338+
freq = freq.replace("H", "h")
1339+
elif isinstance(freq_as_offset, Minute):
1340+
freq = freq.replace("T", "min")
1341+
elif isinstance(freq_as_offset, Second):
1342+
freq = freq.replace("S", "s")
1343+
elif isinstance(freq_as_offset, Millisecond):
1344+
freq = freq.replace("L", "ms")
1345+
elif isinstance(freq_as_offset, Microsecond):
1346+
freq = freq.replace("U", "us")
1347+
1348+
return freq
1349+
1350+
12601351
def date_range_like(source, calendar, use_cftime=None):
12611352
"""Generate a datetime array with the same frequency, start and end as
12621353
another one, but in a different calendar.
@@ -1301,21 +1392,8 @@ def date_range_like(source, calendar, use_cftime=None):
13011392
"`date_range_like` was unable to generate a range as the source frequency was not inferable."
13021393
)
13031394

1304-
# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
1305-
# frequencies, but older versions of pandas do not support these as
1306-
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
1307-
# we add logic to continue using the deprecated "M" and "Q" frequency
1308-
# strings in these circumstances.
1309-
if Version(pd.__version__) < Version("2.2"):
1310-
freq_as_offset = to_offset(freq)
1311-
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
1312-
freq = freq.replace("ME", "M")
1313-
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
1314-
freq = freq.replace("QE", "Q")
1315-
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
1316-
freq = freq.replace("YS", "AS")
1317-
elif isinstance(freq_as_offset, YearEnd) and "YE" in freq:
1318-
freq = freq.replace("YE", "A")
1395+
# TODO remove once requiring pandas >= 2.2
1396+
freq = _legacy_to_new_freq(freq)
13191397

13201398
use_cftime = _should_cftime_be_used(source, calendar, use_cftime)
13211399

xarray/coding/frequencies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
import numpy as np
4646
import pandas as pd
4747

48-
from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS
48+
from xarray.coding.cftime_offsets import _MONTH_ABBREVIATIONS, _legacy_to_new_freq
4949
from xarray.coding.cftimeindex import CFTimeIndex
5050
from xarray.core.common import _contains_datetime_like_objects
5151

@@ -99,7 +99,7 @@ def infer_freq(index):
9999
inferer = _CFTimeFrequencyInferer(index)
100100
return inferer.get_freq()
101101

102-
return pd.infer_freq(index)
102+
return _legacy_to_new_freq(pd.infer_freq(index))
103103

104104

105105
class _CFTimeFrequencyInferer: # (pd.tseries.frequencies._FrequencyInferer):

xarray/core/groupby.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pandas as pd
1212
from packaging.version import Version
1313

14+
from xarray.coding.cftime_offsets import _new_to_legacy_freq
1415
from xarray.core import dtypes, duck_array_ops, nputils, ops
1516
from xarray.core._aggregations import (
1617
DataArrayGroupByAggregations,
@@ -529,7 +530,8 @@ def __post_init__(self) -> None:
529530
)
530531
else:
531532
index_grouper = pd.Grouper(
532-
freq=grouper.freq,
533+
# TODO remove once requiring pandas >= 2.2
534+
freq=_new_to_legacy_freq(grouper.freq),
533535
closed=grouper.closed,
534536
label=grouper.label,
535537
origin=grouper.origin,

xarray/core/pdcompat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def _convert_base_to_offset(base, freq, index):
8383
from xarray.coding.cftimeindex import CFTimeIndex
8484

8585
if isinstance(index, pd.DatetimeIndex):
86+
freq = cftime_offsets._new_to_legacy_freq(freq)
8687
freq = pd.tseries.frequencies.to_offset(freq)
8788
if isinstance(freq, pd.offsets.Tick):
8889
return pd.Timedelta(base * freq.nanos // freq.n)

xarray/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def _importorskip(
109109
has_pint, requires_pint = _importorskip("pint")
110110
has_numexpr, requires_numexpr = _importorskip("numexpr")
111111
has_flox, requires_flox = _importorskip("flox")
112+
has_pandas_ge_2_2, __ = _importorskip("pandas", "2.2")
112113

113114

114115
# some special cases

xarray/tests/test_accessor_dt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ def test_dask_accessor_method(self, method, parameters) -> None:
248248
assert_equal(actual.compute(), expected.compute())
249249

250250
def test_seasons(self) -> None:
251-
dates = pd.date_range(start="2000/01/01", freq="M", periods=12)
251+
dates = xr.date_range(
252+
start="2000/01/01", freq="ME", periods=12, use_cftime=False
253+
)
252254
dates = dates.append(pd.Index([np.datetime64("NaT")]))
253255
dates = xr.DataArray(dates)
254256
seasons = xr.DataArray(

xarray/tests/test_calendar_ops.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from __future__ import annotations
22

33
import numpy as np
4-
import pandas as pd
54
import pytest
6-
from packaging.version import Version
75

86
from xarray import DataArray, infer_freq
97
from xarray.coding.calendar_ops import convert_calendar, interp_calendar
@@ -89,17 +87,17 @@ def test_convert_calendar_360_days(source, target, freq, align_on):
8987

9088
if align_on == "date":
9189
np.testing.assert_array_equal(
92-
conv.time.resample(time="M").last().dt.day,
90+
conv.time.resample(time="ME").last().dt.day,
9391
[30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
9492
)
9593
elif target == "360_day":
9694
np.testing.assert_array_equal(
97-
conv.time.resample(time="M").last().dt.day,
95+
conv.time.resample(time="ME").last().dt.day,
9896
[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29],
9997
)
10098
else:
10199
np.testing.assert_array_equal(
102-
conv.time.resample(time="M").last().dt.day,
100+
conv.time.resample(time="ME").last().dt.day,
103101
[30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31],
104102
)
105103
if source == "360_day" and align_on == "year":
@@ -135,13 +133,7 @@ def test_convert_calendar_missing(source, target, freq):
135133
)
136134
out = convert_calendar(da_src, target, missing=np.nan, align_on="date")
137135

138-
if Version(pd.__version__) < Version("2.2"):
139-
if freq == "4h" and target == "proleptic_gregorian":
140-
expected_freq = "4H"
141-
else:
142-
expected_freq = freq
143-
else:
144-
expected_freq = freq
136+
expected_freq = freq
145137
assert infer_freq(out.time) == expected_freq
146138

147139
expected = date_range(

0 commit comments

Comments
 (0)