Skip to content

Commit b295193

Browse files
committed
Backcompat
1 parent c8c27f7 commit b295193

File tree

6 files changed

+62
-12
lines changed

6 files changed

+62
-12
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ New Features
3434
By `Tom Nicholas <https://github.com/TomNicholas>`_.
3535
- Added zarr backends for :py:func:`open_groups` (:issue:`9430`, :pull:`9469`).
3636
By `Eni Awowale <https://github.com/eni-awowale>`_.
37-
- Support lazy grouping by dask arrays, and allow specifying groups with ``UniqueGrouper(labels=["a", "b", "c"])``
37+
- Support lazy grouping by dask arrays, and allow specifying ordered groups with ``UniqueGrouper(labels=["a", "b", "c"])``
3838
(:issue:`2852`, :issue:`757`).
3939
By `Deepak Cherian <https://github.com/dcherian>`_.
4040
- Added support for vectorized interpolation using additional interpolators
@@ -49,6 +49,11 @@ Breaking changes
4949

5050
Deprecations
5151
~~~~~~~~~~~~
52+
- Grouping by a chunked array (e.g. dask or cubed) currently eagerly loads that variable in to
53+
memory. This behaviour is deprecated. If eager loading was intended, please load such arrays
54+
manually using ``.load()`` or ``.compute()``. Else pass ``eagerly_compute_group=False``, and
55+
provide expected group labels using the ``labels`` kwarg to a grouper object such as
56+
:py:class:`grouper.UniqueGrouper` or :py:class:`grouper.BinGrouper`.
5257

5358

5459
Bug fixes

xarray/core/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1094,7 +1094,7 @@ def _resample(
10941094
f"Received {type(freq)} instead."
10951095
)
10961096

1097-
rgrouper = ResolvedGrouper(grouper, group, self)
1097+
rgrouper = ResolvedGrouper(grouper, group, self, eagerly_compute_group=False)
10981098

10991099
return resample_cls(
11001100
self,

xarray/core/dataarray.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6733,6 +6733,7 @@ def groupby(
67336733
*,
67346734
squeeze: Literal[False] = False,
67356735
restore_coord_dims: bool = False,
6736+
eagerly_compute_group: bool = True,
67366737
**groupers: Grouper,
67376738
) -> DataArrayGroupBy:
67386739
"""Returns a DataArrayGroupBy object for performing grouped operations.
@@ -6862,7 +6863,9 @@ def groupby(
68626863
)
68636864

68646865
_validate_groupby_squeeze(squeeze)
6865-
rgroupers = _parse_group_and_groupers(self, group, groupers)
6866+
rgroupers = _parse_group_and_groupers(
6867+
self, group, groupers, eagerly_compute_group=eagerly_compute_group
6868+
)
68666869
return DataArrayGroupBy(self, rgroupers, restore_coord_dims=restore_coord_dims)
68676870

68686871
@_deprecate_positional_args("v2024.07.0")
@@ -6877,6 +6880,7 @@ def groupby_bins(
68776880
squeeze: Literal[False] = False,
68786881
restore_coord_dims: bool = False,
68796882
duplicates: Literal["raise", "drop"] = "raise",
6883+
eagerly_compute_group: bool = True,
68806884
) -> DataArrayGroupBy:
68816885
"""Returns a DataArrayGroupBy object for performing grouped operations.
68826886
@@ -6950,7 +6954,9 @@ def groupby_bins(
69506954
precision=precision,
69516955
include_lowest=include_lowest,
69526956
)
6953-
rgrouper = ResolvedGrouper(grouper, group, self)
6957+
rgrouper = ResolvedGrouper(
6958+
grouper, group, self, eagerly_compute_group=eagerly_compute_group
6959+
)
69546960

69556961
return DataArrayGroupBy(
69566962
self,

xarray/core/dataset.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10366,6 +10366,7 @@ def groupby(
1036610366
*,
1036710367
squeeze: Literal[False] = False,
1036810368
restore_coord_dims: bool = False,
10369+
eagerly_compute_group: bool = True,
1036910370
**groupers: Grouper,
1037010371
) -> DatasetGroupBy:
1037110372
"""Returns a DatasetGroupBy object for performing grouped operations.
@@ -10463,7 +10464,9 @@ def groupby(
1046310464
)
1046410465

1046510466
_validate_groupby_squeeze(squeeze)
10466-
rgroupers = _parse_group_and_groupers(self, group, groupers)
10467+
rgroupers = _parse_group_and_groupers(
10468+
self, group, groupers, eagerly_compute_group=eagerly_compute_group
10469+
)
1046710470

1046810471
return DatasetGroupBy(self, rgroupers, restore_coord_dims=restore_coord_dims)
1046910472

@@ -10479,6 +10482,7 @@ def groupby_bins(
1047910482
squeeze: Literal[False] = False,
1048010483
restore_coord_dims: bool = False,
1048110484
duplicates: Literal["raise", "drop"] = "raise",
10485+
eagerly_compute_group: bool = True,
1048210486
) -> DatasetGroupBy:
1048310487
"""Returns a DatasetGroupBy object for performing grouped operations.
1048410488
@@ -10552,7 +10556,9 @@ def groupby_bins(
1055210556
precision=precision,
1055310557
include_lowest=include_lowest,
1055410558
)
10555-
rgrouper = ResolvedGrouper(grouper, group, self)
10559+
rgrouper = ResolvedGrouper(
10560+
grouper, group, self, eagerly_compute_group=eagerly_compute_group
10561+
)
1055610562

1055710563
return DatasetGroupBy(
1055810564
self,

xarray/core/groupby.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
FrozenMappingWarningOnValuesAccess,
4242
contains_only_chunked_or_numpy,
4343
either_dict_or_kwargs,
44+
emit_user_level_warning,
4445
hashable,
4546
is_scalar,
4647
maybe_wrap_array,
@@ -284,6 +285,7 @@ class ResolvedGrouper(Generic[T_DataWithCoords]):
284285
grouper: Grouper
285286
group: T_Group
286287
obj: T_DataWithCoords
288+
eagerly_compute_group: bool = field(repr=False)
287289

288290
# returned by factorize:
289291
encoded: EncodedGroups = field(init=False, repr=False)
@@ -310,6 +312,18 @@ def __post_init__(self) -> None:
310312

311313
self.group = _resolve_group(self.obj, self.group)
312314

315+
if (
316+
self.eagerly_compute_group
317+
and not isinstance(self.group, _DummyGroup)
318+
and is_chunked_array(self.group.variable._data)
319+
):
320+
emit_user_level_warning(
321+
f"Eagerly computing the DataArray you're grouping by ({self.group.name!r}) "
322+
"is deprecated and will be removed in v2025.05.0. "
323+
"Please load this array's data manually using `.compute` or `.load`.",
324+
DeprecationWarning,
325+
)
326+
313327
self.encoded = self.grouper.factorize(self.group)
314328

315329
@property
@@ -330,7 +344,11 @@ def __len__(self) -> int:
330344

331345

332346
def _parse_group_and_groupers(
333-
obj: T_Xarray, group: GroupInput, groupers: dict[str, Grouper]
347+
obj: T_Xarray,
348+
group: GroupInput,
349+
groupers: dict[str, Grouper],
350+
*,
351+
eagerly_compute_group: bool,
334352
) -> tuple[ResolvedGrouper, ...]:
335353
from xarray.core.dataarray import DataArray
336354
from xarray.core.variable import Variable
@@ -355,7 +373,11 @@ def _parse_group_and_groupers(
355373

356374
rgroupers: tuple[ResolvedGrouper, ...]
357375
if isinstance(group, DataArray | Variable):
358-
rgroupers = (ResolvedGrouper(UniqueGrouper(), group, obj),)
376+
rgroupers = (
377+
ResolvedGrouper(
378+
UniqueGrouper(), group, obj, eagerly_compute_group=eagerly_compute_group
379+
),
380+
)
359381
else:
360382
if group is not None:
361383
if TYPE_CHECKING:
@@ -368,7 +390,9 @@ def _parse_group_and_groupers(
368390
grouper_mapping = cast("Mapping[Hashable, Grouper]", groupers)
369391

370392
rgroupers = tuple(
371-
ResolvedGrouper(grouper, group, obj)
393+
ResolvedGrouper(
394+
grouper, group, obj, eagerly_compute_group=eagerly_compute_group
395+
)
372396
for group, grouper in grouper_mapping.items()
373397
)
374398
return rgroupers

xarray/tests/test_groupby.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2843,7 +2843,10 @@ def test_multiple_groupers(use_flox) -> None:
28432843
if has_dask:
28442844
b["xy"] = b["xy"].chunk()
28452845
with raise_if_dask_computes():
2846-
gb = b.groupby(x=UniqueGrouper(), xy=UniqueGrouper(labels=["a", "b", "c"]))
2846+
with pytest.warns(DeprecationWarning):
2847+
gb = b.groupby(
2848+
x=UniqueGrouper(), xy=UniqueGrouper(labels=["a", "b", "c"])
2849+
)
28472850

28482851
expected = xr.DataArray(
28492852
[[[1, 1, 1], [0, 1, 2]]] * 4,
@@ -2994,7 +2997,11 @@ def test_lazy_grouping(grouper, expect_index):
29942997
pd.testing.assert_index_equal(encoded.full_index, expect_index)
29952998
np.testing.assert_array_equal(encoded.unique_coord.values, np.array(expect_index))
29962999

2997-
lazy = xr.Dataset({"foo": data}, coords={"zoo": data}).groupby(zoo=grouper).count()
3000+
lazy = (
3001+
xr.Dataset({"foo": data}, coords={"zoo": data})
3002+
.groupby(zoo=grouper, eagerly_compute_group=False)
3003+
.count()
3004+
)
29983005
eager = (
29993006
xr.Dataset({"foo": data}, coords={"zoo": data.compute()})
30003007
.groupby(zoo=grouper)
@@ -3019,7 +3026,9 @@ def test_lazy_grouping_errors():
30193026
coords={"y": ("x", dask.array.arange(20, chunks=3))},
30203027
)
30213028

3022-
gb = data.groupby(y=UniqueGrouper(labels=np.arange(5, 10)))
3029+
gb = data.groupby(
3030+
y=UniqueGrouper(labels=np.arange(5, 10)), eagerly_compute_group=False
3031+
)
30233032
message = "not supported when lazily grouping by"
30243033
with pytest.raises(ValueError, match=message):
30253034
gb.map(lambda x: x)

0 commit comments

Comments
 (0)