Skip to content

Commit

Permalink
better backcompat
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian committed Oct 29, 2024
1 parent b295193 commit f826b65
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 12 deletions.
10 changes: 10 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -6749,6 +6749,11 @@ def groupby(
restore_coord_dims : bool, default: False
If True, also restore the dimension order of multi-dimensional
coordinates.
eagerly_compute_group: bool
Whether to eagerly compute ``group`` when it is a chunked array.
This option is to maintain backwards compatibility. Set to False
to opt-in to future behaviour, where ``group`` is not automatically loaded
into memory.
**groupers : Mapping of str to Grouper or Resampler
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
One of ``group`` or ``groupers`` must be provided.
Expand Down Expand Up @@ -6917,6 +6922,11 @@ def groupby_bins(
coordinates.
duplicates : {"raise", "drop"}, default: "raise"
If bin edges are not unique, raise ValueError or drop non-uniques.
eagerly_compute_group: bool
Whether to eagerly compute ``group`` when it is a chunked array.
This option is to maintain backwards compatibility. Set to False
to opt-in to future behaviour, where ``group`` is not automatically loaded
into memory.
Returns
-------
Expand Down
10 changes: 10 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10382,6 +10382,11 @@ def groupby(
restore_coord_dims : bool, default: False
If True, also restore the dimension order of multi-dimensional
coordinates.
eagerly_compute_group: bool
Whether to eagerly compute ``group`` when it is a chunked array.
This option is to maintain backwards compatibility. Set to False
to opt-in to future behaviour, where ``group`` is not automatically loaded
into memory.
**groupers : Mapping of str to Grouper or Resampler
Mapping of variable name to group by to :py:class:`Grouper` or :py:class:`Resampler` object.
One of ``group`` or ``groupers`` must be provided.
Expand Down Expand Up @@ -10519,6 +10524,11 @@ def groupby_bins(
coordinates.
duplicates : {"raise", "drop"}, default: "raise"
If bin edges are not unique, raise ValueError or drop non-uniques.
eagerly_compute_group: bool
Whether to eagerly compute ``group`` when it is a chunked array.
This option is to maintain backwards compatibility. Set to False
to opt-in to future behaviour, where ``group`` is not automatically loaded
into memory.
Returns
-------
Expand Down
49 changes: 37 additions & 12 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,21 +308,45 @@ def __post_init__(self) -> None:
# of pd.cut
# We do not want to modify the original object, since the same grouper
# might be used multiple times.
from xarray.groupers import BinGrouper, UniqueGrouper

self.grouper = copy.deepcopy(self.grouper)

self.group = _resolve_group(self.obj, self.group)

if (
self.eagerly_compute_group
and not isinstance(self.group, _DummyGroup)
and is_chunked_array(self.group.variable._data)
if not isinstance(self.group, _DummyGroup) and is_chunked_array(
self.group.variable._data
):
emit_user_level_warning(
f"Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
"is deprecated and will be removed in v2025.05.0. "
"Please load this array's data manually using `.compute` or `.load`.",
DeprecationWarning,
)
if self.eagerly_compute_group is False:
# This requires a pass to discover the groups present
if (
isinstance(self.grouper, UniqueGrouper)
and self.grouper.labels is None
):
raise ValueError(
"Please pass `labels` to UniqueGrouper when grouping by a chunked array."
)
# this requires a pass to compute the bin edges
if isinstance(self.grouper, BinGrouper) and isinstance(
self.grouper.bins, int
):
raise ValueError(
"Please pass explicit bin edges to BinGrouper using the ``bins`` kwarg"
"when grouping by a chunked array."
)

if self.eagerly_compute_group:
emit_user_level_warning(
f""""Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
is deprecated and will raise an error in v2025.05.0.
Please load this array's data manually using `.compute` or `.load`.
To intentionally avoid eager loading, either (1) specify
`.groupby({self.group.name}=UniqueGrouper(labels=...), eagerly_load_group=False)`
or (2) pass explicit bin edges using or `.groupby({self.group.name}=BinGrouper(bins=...),
eagerly_load_group=False)`; as appropriate.""",
DeprecationWarning,
)
self.group = self.group.compute()

self.encoded = self.grouper.factorize(self.group)

Expand Down Expand Up @@ -678,8 +702,9 @@ def _raise_if_by_is_chunked(self):
if self._by_chunked:
raise ValueError(
"This method is not supported when lazily grouping by a chunked array. "
"Either load the array in to memory prior to grouping, or explore another "
"way of applying your function, potentially using the `flox` package."
"Either load the array in to memory prior to grouping using .load or .compute, "
" or explore another way of applying your function, "
"potentially using the `flox` package."
)

def _raise_if_not_single_group(self):
Expand Down
35 changes: 35 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3091,6 +3091,41 @@ def test_groupby_multiple_bin_grouper_missing_groups():
assert_identical(actual, expected)


@requires_dask
def test_groupby_dask_eager_load_warnings():
ds = xr.Dataset(
{"foo": (("z"), np.arange(12))},
coords={"x": ("z", np.arange(12)), "y": ("z", np.arange(12))},
).chunk(z=6)

with pytest.warns(DeprecationWarning):
ds.groupby(x=UniqueGrouper())

with pytest.warns(DeprecationWarning):
ds.groupby("x")

with pytest.warns(DeprecationWarning):
ds.groupby(ds.x)

with pytest.raises(ValueError, match="Please pass"):
ds.groupby("x", eagerly_compute_group=False)

# This is technically fine but anyone iterating over the groupby object
# will see an error, so let's warn and have them opt-in.
with pytest.warns(DeprecationWarning):
ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]))

ds.groupby(x=UniqueGrouper(labels=[1, 2, 3]), eagerly_compute_group=False)

with pytest.warns(DeprecationWarning):
ds.groupby_bins("x", bins=3)
with pytest.raises(ValueError, match="Please pass"):
ds.groupby_bins("x", bins=3, eagerly_compute_group=False)
with pytest.warns(DeprecationWarning):
ds.groupby_bins("x", bins=[1, 2, 3])
ds.groupby_bins("x", bins=[1, 2, 3], eagerly_compute_group=False)


# Possible property tests
# 1. lambda x: x
# 2. grouped-reduce on unique coords is identical to array
Expand Down

0 comments on commit f826b65

Please sign in to comment.