Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support additional dtypes in resample #9413

Merged
merged 18 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ Bug fixes
- Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray``
in NumPy 2.0 (:issue:`9312`, :pull:`9393`)
By `Andrew Scherer <https://github.com/andrew-s28>`_.
- Fix support for using ``pandas.BaseOffset``, ``pandas.Timedelta``, and
dcherian marked this conversation as resolved.
Show resolved Hide resolved
``datetime.timedelta`` objects as ``resample`` frequencies
(:issue:`9408`, :pull:`9413`).
By `Oliver Higgs <https://github.com/oliverhiggs>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
37 changes: 36 additions & 1 deletion xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,11 +772,18 @@ def _emit_freq_deprecation_warning(deprecated_freq):
emit_user_level_warning(message, FutureWarning)


def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffset:
def to_offset(
freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset,
warn: bool = True,
) -> BaseCFTimeOffset:
"""Convert a frequency string to the appropriate subclass of
BaseCFTimeOffset."""
if isinstance(freq, BaseCFTimeOffset):
return freq
if isinstance(freq, timedelta | pd.Timedelta):
return delta_to_tick(freq)
if isinstance(freq, pd.DateOffset):
freq = freq.freqstr
dcherian marked this conversation as resolved.
Show resolved Hide resolved

match = re.match(_PATTERN, freq)
if match is None:
Expand All @@ -791,6 +798,34 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs
return _FREQUENCIES[freq](n=multiples)


def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick:
"""Adapted from pandas.tslib.delta_to_tick"""
if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0:
# pandas.Timedelta has nanoseconds, but these are not supported
raise ValueError(
"Unable to convert 'pandas.Timedelta' object with non-zero "
"nanoseconds to 'CFTimeOffset' object"
)
if delta.microseconds == 0:
if delta.seconds == 0:
return Day(n=delta.days)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(n=seconds // 3600)
elif seconds % 60 == 0:
return Minute(n=seconds // 60)
else:
return Second(n=seconds)
else:
# Regardless of the days and seconds this will always be a Millsecond
dcherian marked this conversation as resolved.
Show resolved Hide resolved
# or Microsecond object
if delta.microseconds % 1_000 == 0:
return Millisecond(n=delta.microseconds // 1_000)
else:
return Microsecond(n=delta.microseconds)


def to_cftime_datetime(date_str_or_date, calendar=None):
if cftime is None:
raise ModuleNotFoundError("No module named 'cftime'")
Expand Down
16 changes: 10 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime
import warnings
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping
from contextlib import suppress
Expand All @@ -13,6 +14,7 @@
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.types import ResampleCompatible
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
Expand All @@ -32,8 +34,6 @@


if TYPE_CHECKING:
import datetime

from numpy.typing import DTypeLike

from xarray.core.dataarray import DataArray
Expand Down Expand Up @@ -891,14 +891,14 @@ def rolling_exp(
def _resample(
self,
resample_cls: type[T_Resample],
indexer: Mapping[Hashable, str | Resampler] | None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None,
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
restore_coord_dims: bool | None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> T_Resample:
"""Returns a Resample object for performing resampling operations.

Expand Down Expand Up @@ -1078,14 +1078,18 @@ def _resample(
)

grouper: Resampler
if isinstance(freq, str):
if isinstance(freq, ResampleCompatible):
grouper = TimeResampler(
freq=freq, closed=closed, label=label, origin=origin, offset=offset
)
elif isinstance(freq, Resampler):
grouper = freq
else:
raise ValueError("freq must be a str or a Resampler object")
raise ValueError(
"freq must be an object of type 'str', 'datetime.timedelta', "
"'pandas.Timedelta', 'pandas.DateOffset', or 'TimeResampler'. "
f"Received {type(freq)} instead."
)

rgrouper = ResolvedGrouper(grouper, group, self)

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
Self,
SideOptions,
T_ChunkDimFreq,
Expand Down Expand Up @@ -7244,15 +7245,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Hashable, str | Resampler] | None = None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DataArrayResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -7263,7 +7264,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -7287,7 +7288,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
SideOptions,
T_ChunkDimFreq,
T_DatasetPadConstantValues,
Expand Down Expand Up @@ -10685,15 +10686,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Any, str | Resampler] | None = None,
indexer: Mapping[Any, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DatasetResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -10704,7 +10705,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -10728,7 +10729,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
4 changes: 2 additions & 2 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
from xarray.core.types import SideOptions

if typing.TYPE_CHECKING:
from xarray.core.types import CFTimeDatetime
from xarray.core.types import CFTimeDatetime, ResampleCompatible


class CFTimeGrouper:
Expand All @@ -75,7 +75,7 @@ class CFTimeGrouper:

def __init__(
self,
freq: str | BaseCFTimeOffset,
freq: ResampleCompatible | BaseCFTimeOffset,
closed: SideOptions | None = None,
label: SideOptions | None = None,
origin: str | CFTimeDatetime = "start_day",
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,5 @@ def copy(
Bins = Union[
int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index
]

ResampleCompatible: TypeAlias = str | datetime.timedelta | pd.Timedelta | pd.DateOffset
20 changes: 16 additions & 4 deletions xarray/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@
import numpy as np
import pandas as pd

from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq
from xarray.core import duck_array_ops
from xarray.core.coordinates import Coordinates
from xarray.core.dataarray import DataArray
from xarray.core.groupby import T_Group, _DummyGroup
from xarray.core.indexes import safe_cast_to_index
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
from xarray.core.types import (
Bins,
DatetimeLike,
GroupIndices,
ResampleCompatible,
SideOptions,
)
from xarray.core.variable import Variable

__all__ = [
Expand Down Expand Up @@ -336,7 +342,7 @@ class TimeResampler(Resampler):

Attributes
----------
freq : str
freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset
Frequency to resample to. See `Pandas frequency
aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
for a list of possible values.
Expand All @@ -358,7 +364,7 @@ class TimeResampler(Resampler):
An offset timedelta added to the origin.
"""

freq: str
freq: ResampleCompatible
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
Expand Down Expand Up @@ -388,6 +394,12 @@ def _init_properties(self, group: T_Group) -> None:
offset=offset,
)
else:
if isinstance(self.freq, BaseCFTimeOffset):
raise ValueError(
"'BaseCFTimeOffset' resample frequencies are only supported "
"when resampling a 'CFTimeIndex'"
)

self.index_grouper = pd.Grouper(
# TODO remove once requiring pandas >= 2.2
freq=_new_to_legacy_freq(self.freq),
Expand Down
49 changes: 48 additions & 1 deletion xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime
import operator
import warnings
from unittest import mock
Expand Down Expand Up @@ -757,7 +758,6 @@ def test_groupby_none_group_name() -> None:


def test_groupby_getitem(dataset) -> None:

assert_identical(dataset.sel(x=["a"]), dataset.groupby("x")["a"])
assert_identical(dataset.sel(z=[1]), dataset.groupby("z")[1])
assert_identical(dataset.foo.sel(x=["a"]), dataset.foo.groupby("x")["a"])
Expand Down Expand Up @@ -1813,6 +1813,30 @@ def resample_as_pandas(array, *args, **kwargs):
with pytest.raises(ValueError):
reverse.resample(time="1D").mean()

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_dtype(self, use_cftime: bool) -> None:
if use_cftime and not has_cftime:
pytest.skip()
array = DataArray(
np.arange(10),
[
(
"time",
xr.date_range(
"2000-01-01", freq="6h", periods=10, use_cftime=use_cftime
),
)
],
)
test_resample_freqs = (
"10min",
pd.Timedelta(hours=2),
pd.offsets.MonthBegin(),
datetime.timedelta(days=1, hours=6),
)
for freq in test_resample_freqs:
array.resample(time=freq)
dcherian marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_doctest(self, use_cftime: bool) -> None:
# run the doctest example here so we are not surprised
Expand Down Expand Up @@ -2232,6 +2256,29 @@ def test_resample_and_first(self) -> None:
result = actual.reduce(method)
assert_equal(expected, result)

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_dtype(self, use_cftime: bool) -> None:
if use_cftime and not has_cftime:
pytest.skip()
times = xr.date_range(
"2000-01-01", freq="6h", periods=10, use_cftime=use_cftime
)
ds = Dataset(
{
"foo": (["time", "x", "y"], np.random.randn(10, 5, 3)),
"bar": ("time", np.random.randn(10), {"meta": "data"}),
"time": times,
}
)
test_resample_freqs = (
"10min",
pd.Timedelta(hours=2),
pd.offsets.MonthBegin(),
datetime.timedelta(days=1, hours=6),
)
for freq in test_resample_freqs:
ds.resample(time=freq)

def test_resample_min_count(self) -> None:
times = pd.date_range("2000-01-01", freq="6h", periods=10)
ds = Dataset(
Expand Down
Loading