Skip to content

Commit 1083c9d

Browse files
spencerkclarkpre-commit-ci[bot]dcherian
authored
Enable origin and offset arguments in resample (#7284)
* Initial work toward enabling origin and offset arguments in resample * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix _convert_offset_to_timedelta * Reduce number of tests * Address initial review comments * Add more typing information * Make cftime import lazy * Fix module_available import and test * Remove old origin argument * Add type annotations for resample_cftime.py * Add None as a possibility for closed and label * Add what's new entry * Add missing type annotation * Delete added line * Fix typing errors * Add comment and test for as_timedelta stub * Remove old code * [test-upstream] Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 78b27ec commit 1083c9d

File tree

10 files changed

+456
-107
lines changed

10 files changed

+456
-107
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ v2022.11.1 (unreleased)
2121

2222
New Features
2323
~~~~~~~~~~~~
24-
24+
- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
25+
and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`6538`). By `Spencer
26+
Clark <https://github.com/spencerkclark>`_.
2527
- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
2628
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.
2729

xarray/coding/cftime_offsets.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ def __mul__(self, other):
207207
return new_self * other
208208
return type(self)(n=other * self.n)
209209

210+
def as_timedelta(self):
211+
"""All Tick subclasses must implement an as_timedelta method."""
212+
raise NotImplementedError
213+
210214

211215
def _get_day_of_month(other, day_option):
212216
"""Find the day in `other`'s month that satisfies a BaseCFTimeOffset's

xarray/core/common.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@
4444
from .indexes import Index
4545
from .resample import Resample
4646
from .rolling_exp import RollingExp
47-
from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
47+
from .types import (
48+
DatetimeLike,
49+
DTypeLikeSave,
50+
ScalarOrArray,
51+
SideOptions,
52+
T_DataWithCoords,
53+
)
4854
from .variable import Variable
4955

5056
DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
@@ -817,7 +823,9 @@ def _resample(
817823
skipna: bool | None,
818824
closed: SideOptions | None,
819825
label: SideOptions | None,
820-
base: int,
826+
base: int | None,
827+
offset: pd.Timedelta | datetime.timedelta | str | None,
828+
origin: str | DatetimeLike,
821829
keep_attrs: bool | None,
822830
loffset: datetime.timedelta | str | None,
823831
restore_coord_dims: bool | None,
@@ -845,6 +853,18 @@ def _resample(
845853
For frequencies that evenly subdivide 1 day, the "origin" of the
846854
aggregated intervals. For example, for "24H" frequency, base could
847855
range from 0 through 23.
856+
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
857+
The datetime on which to adjust the grouping. The timezone of origin
858+
must match the timezone of the index.
859+
860+
If a datetime is not used, these values are also supported:
861+
- 'epoch': `origin` is 1970-01-01
862+
- 'start': `origin` is the first value of the timeseries
863+
- 'start_day': `origin` is the first day at midnight of the timeseries
864+
- 'end': `origin` is the last value of the timeseries
865+
- 'end_day': `origin` is the ceiling midnight of the last day
866+
offset : pd.Timedelta, datetime.timedelta, or str, default is None
867+
An offset timedelta added to the origin.
848868
loffset : timedelta or str, optional
849869
Offset used to adjust the resampled time labels. Some pandas date
850870
offset strings are supported.
@@ -960,10 +980,24 @@ def _resample(
960980
if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
961981
from .resample_cftime import CFTimeGrouper
962982

963-
grouper = CFTimeGrouper(freq, closed, label, base, loffset)
983+
grouper = CFTimeGrouper(
984+
freq=freq,
985+
closed=closed,
986+
label=label,
987+
base=base,
988+
loffset=loffset,
989+
origin=origin,
990+
offset=offset,
991+
)
964992
else:
965993
grouper = pd.Grouper(
966-
freq=freq, closed=closed, label=label, base=base, loffset=loffset
994+
freq=freq,
995+
closed=closed,
996+
label=label,
997+
base=base,
998+
offset=offset,
999+
origin=origin,
1000+
loffset=loffset,
9671001
)
9681002
group = DataArray(
9691003
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM

xarray/core/dataarray.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from .rolling import DataArrayCoarsen, DataArrayRolling
7979
from .types import (
8080
CoarsenBoundaryOptions,
81+
DatetimeLike,
8182
DatetimeUnitOptions,
8283
Dims,
8384
ErrorOptions,
@@ -6531,7 +6532,9 @@ def resample(
65316532
skipna: bool | None = None,
65326533
closed: SideOptions | None = None,
65336534
label: SideOptions | None = None,
6534-
base: int = 0,
6535+
base: int | None = None,
6536+
offset: pd.Timedelta | datetime.timedelta | str | None = None,
6537+
origin: str | DatetimeLike = "start_day",
65356538
keep_attrs: bool | None = None,
65366539
loffset: datetime.timedelta | str | None = None,
65376540
restore_coord_dims: bool | None = None,
@@ -6555,10 +6558,22 @@ def resample(
65556558
Side of each interval to treat as closed.
65566559
label : {"left", "right"}, optional
65576560
Side of each interval to use for labeling.
6558-
base : int, default = 0
6561+
base : int, optional
65596562
For frequencies that evenly subdivide 1 day, the "origin" of the
65606563
aggregated intervals. For example, for "24H" frequency, base could
65616564
range from 0 through 23.
6565+
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
6566+
The datetime on which to adjust the grouping. The timezone of origin
6567+
must match the timezone of the index.
6568+
6569+
If a datetime is not used, these values are also supported:
6570+
- 'epoch': `origin` is 1970-01-01
6571+
- 'start': `origin` is the first value of the timeseries
6572+
- 'start_day': `origin` is the first day at midnight of the timeseries
6573+
- 'end': `origin` is the last value of the timeseries
6574+
- 'end_day': `origin` is the ceiling midnight of the last day
6575+
offset : pd.Timedelta, datetime.timedelta, or str, default is None
6576+
An offset timedelta added to the origin.
65626577
loffset : timedelta or str, optional
65636578
Offset used to adjust the resampled time labels. Some pandas date
65646579
offset strings are supported.
@@ -6640,6 +6655,8 @@ def resample(
66406655
closed=closed,
66416656
label=label,
66426657
base=base,
6658+
offset=offset,
6659+
origin=origin,
66436660
keep_attrs=keep_attrs,
66446661
loffset=loffset,
66456662
restore_coord_dims=restore_coord_dims,

xarray/core/dataset.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
CoarsenBoundaryOptions,
108108
CombineAttrsOptions,
109109
CompatOptions,
110+
DatetimeLike,
110111
DatetimeUnitOptions,
111112
Dims,
112113
ErrorOptions,
@@ -9128,7 +9129,9 @@ def resample(
91289129
skipna: bool | None = None,
91299130
closed: SideOptions | None = None,
91309131
label: SideOptions | None = None,
9131-
base: int = 0,
9132+
base: int | None = None,
9133+
offset: pd.Timedelta | datetime.timedelta | str | None = None,
9134+
origin: str | DatetimeLike = "start_day",
91329135
keep_attrs: bool | None = None,
91339136
loffset: datetime.timedelta | str | None = None,
91349137
restore_coord_dims: bool | None = None,
@@ -9152,10 +9155,22 @@ def resample(
91529155
Side of each interval to treat as closed.
91539156
label : {"left", "right"}, optional
91549157
Side of each interval to use for labeling.
9155-
base : int, default = 0
9158+
base : int, optional
91569159
For frequencies that evenly subdivide 1 day, the "origin" of the
91579160
aggregated intervals. For example, for "24H" frequency, base could
91589161
range from 0 through 23.
9162+
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
9163+
The datetime on which to adjust the grouping. The timezone of origin
9164+
must match the timezone of the index.
9165+
9166+
If a datetime is not used, these values are also supported:
9167+
- 'epoch': `origin` is 1970-01-01
9168+
- 'start': `origin` is the first value of the timeseries
9169+
- 'start_day': `origin` is the first day at midnight of the timeseries
9170+
- 'end': `origin` is the last value of the timeseries
9171+
- 'end_day': `origin` is the ceiling midnight of the last day
9172+
offset : pd.Timedelta, datetime.timedelta, or str, default is None
9173+
An offset timedelta added to the origin.
91599174
loffset : timedelta or str, optional
91609175
Offset used to adjust the resampled time labels. Some pandas date
91619176
offset strings are supported.
@@ -9190,6 +9205,8 @@ def resample(
91909205
closed=closed,
91919206
label=label,
91929207
base=base,
9208+
offset=offset,
9209+
origin=origin,
91939210
keep_attrs=keep_attrs,
91949211
loffset=loffset,
91959212
restore_coord_dims=restore_coord_dims,

0 commit comments

Comments
 (0)