Skip to content

Commit 638b251

Browse files
authored
Future warning for default reduction dimension of groupby (#2366)
* warn the default reduction dimension of groupby * Only use DEFAULT_DIMS in groupby/resample * Restore unintended line break * Lint * Add whatsnew * Support dataset.groupby * Add a version in the warning message. * is -> == * Update tests * Update docs for DatasetResample.reduce * Match dataset.resample behavior to the current one. * Update via comments.
1 parent 78058e2 commit 638b251

File tree

10 files changed

+141
-29
lines changed

10 files changed

+141
-29
lines changed

doc/whats-new.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ What's New
3030
v0.11.0 (unreleased)
3131
--------------------
3232

33+
Breaking changes
34+
~~~~~~~~~~~~~~~~
35+
36+
- Reduction of :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample`
37+
without dimension argument will change in the next release.
38+
Now we warn a FutureWarning.
39+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
40+
41+
Documentation
42+
~~~~~~~~~~~~~
3343
Enhancements
3444
~~~~~~~~~~~~
3545

xarray/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@
3434
from . import tutorial
3535
from . import ufuncs
3636
from . import testing
37+
38+
from .core.common import ALL_DIMS

xarray/core/common.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
from . import duck_array_ops, dtypes, formatting, ops
1111
from .arithmetic import SupportsArithmetic
1212
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
13-
from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict
13+
from .utils import either_dict_or_kwargs, Frozen, SortedKeysDict, ReprObject
14+
15+
16+
# Used as a sentinel value to indicate a all dimensions
17+
ALL_DIMS = ReprObject('<all-dims>')
1418

1519

1620
class ImplementsArrayReduce(object):

xarray/core/dataset.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
from .. import conventions
1919
from .alignment import align
2020
from .common import (
21-
DataWithCoords, ImplementsDatasetReduce, _contains_datetime_like_objects)
21+
ALL_DIMS, DataWithCoords, ImplementsDatasetReduce,
22+
_contains_datetime_like_objects)
2223
from .coordinates import (
2324
DatasetCoordinates, Indexes, LevelCoordinatesSource,
2425
assert_coordinate_consistent, remap_label_indexers)
@@ -743,7 +744,7 @@ def copy(self, deep=False, data=None):
743744
Shallow copy versus deep copy
744745
745746
>>> da = xr.DataArray(np.random.randn(2, 3))
746-
>>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])},
747+
>>> ds = xr.Dataset({'foo': da, 'bar': ('x', [-1, 2])},
747748
coords={'x': ['one', 'two']})
748749
>>> ds.copy()
749750
<xarray.Dataset>
@@ -775,7 +776,7 @@ def copy(self, deep=False, data=None):
775776
foo (dim_0, dim_1) float64 7.0 0.3897 -1.862 -0.6091 -1.051 -0.3003
776777
bar (x) int64 -1 2
777778
778-
Changing the data using the ``data`` argument maintains the
779+
Changing the data using the ``data`` argument maintains the
779780
structure of the original object, but with the new data. Original
780781
object is unaffected.
781782
@@ -826,7 +827,7 @@ def copy(self, deep=False, data=None):
826827
# skip __init__ to avoid costly validation
827828
return self._construct_direct(variables, self._coord_names.copy(),
828829
self._dims.copy(), self._attrs_copy(),
829-
encoding=self.encoding)
830+
encoding=self.encoding)
830831

831832
def _subset_with_all_valid_coords(self, variables, coord_names, attrs):
832833
needed_dims = set()
@@ -2893,6 +2894,8 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
28932894
Dataset with this object's DataArrays replaced with new DataArrays
28942895
of summarized data and the indicated dimension(s) removed.
28952896
"""
2897+
if dim is ALL_DIMS:
2898+
dim = None
28962899
if isinstance(dim, basestring):
28972900
dims = set([dim])
28982901
elif dim is None:

xarray/core/groupby.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
from __future__ import absolute_import, division, print_function
22

33
import functools
4+
import warnings
45

56
import numpy as np
67
import pandas as pd
78

8-
from . import dtypes, duck_array_ops, nputils, ops
9+
from . import dtypes, duck_array_ops, nputils, ops, utils
910
from .arithmetic import SupportsArithmetic
1011
from .combine import concat
11-
from .common import ImplementsArrayReduce, ImplementsDatasetReduce
12+
from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce
1213
from .pycompat import integer_types, range, zip
1314
from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index
1415
from .variable import IndexVariable, Variable, as_variable
@@ -567,10 +568,39 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
567568
Array with summarized data and the indicated dimension(s)
568569
removed.
569570
"""
571+
if dim == DEFAULT_DIMS:
572+
dim = ALL_DIMS
573+
# TODO change this to dim = self._group_dim after
574+
# the deprecation process
575+
if self._obj.ndim > 1:
576+
warnings.warn(
577+
"Default reduction dimension will be changed to the "
578+
"grouped dimension after xarray 0.12. To silence this "
579+
"warning, pass dim=xarray.ALL_DIMS explicitly.",
580+
FutureWarning, stacklevel=2)
581+
570582
def reduce_array(ar):
571583
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
572584
return self.apply(reduce_array, shortcut=shortcut)
573585

586+
# TODO remove the following class method and DEFAULT_DIMS after the
587+
# deprecation cycle
588+
@classmethod
589+
def _reduce_method(cls, func, include_skipna, numeric_only):
590+
if include_skipna:
591+
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None,
592+
keep_attrs=False, **kwargs):
593+
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
594+
skipna=skipna, allow_lazy=True, **kwargs)
595+
else:
596+
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None,
597+
keep_attrs=False, **kwargs):
598+
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
599+
allow_lazy=True, **kwargs)
600+
return wrapped_func
601+
602+
603+
DEFAULT_DIMS = utils.ReprObject('<default-dims>')
574604

575605
ops.inject_reduce_methods(DataArrayGroupBy)
576606
ops.inject_binary_ops(DataArrayGroupBy)
@@ -649,10 +679,40 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
649679
Array with summarized data and the indicated dimension(s)
650680
removed.
651681
"""
682+
if dim == DEFAULT_DIMS:
683+
dim = ALL_DIMS
684+
# TODO change this to dim = self._group_dim after
685+
# the deprecation process. Do not forget to remove _reduce_method
686+
warnings.warn(
687+
"Default reduction dimension will be changed to the "
688+
"grouped dimension after xarray 0.12. To silence this "
689+
"warning, pass dim=xarray.ALL_DIMS explicitly.",
690+
FutureWarning, stacklevel=2)
691+
elif dim is None:
692+
dim = self._group_dim
693+
652694
def reduce_dataset(ds):
653695
return ds.reduce(func, dim, keep_attrs, **kwargs)
654696
return self.apply(reduce_dataset)
655697

698+
# TODO remove the following class method and DEFAULT_DIMS after the
699+
# deprecation cycle
700+
@classmethod
701+
def _reduce_method(cls, func, include_skipna, numeric_only):
702+
if include_skipna:
703+
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
704+
skipna=None, **kwargs):
705+
return self.reduce(func, dim, keep_attrs, skipna=skipna,
706+
numeric_only=numeric_only, allow_lazy=True,
707+
**kwargs)
708+
else:
709+
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
710+
**kwargs):
711+
return self.reduce(func, dim, keep_attrs,
712+
numeric_only=numeric_only, allow_lazy=True,
713+
**kwargs)
714+
return wrapped_func
715+
656716
def assign(self, **kwargs):
657717
"""Assign data variables by group.
658718

xarray/core/resample.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import absolute_import, division, print_function
22

33
from . import ops
4-
from .groupby import DataArrayGroupBy, DatasetGroupBy
4+
from .groupby import DataArrayGroupBy, DatasetGroupBy, DEFAULT_DIMS
55
from .pycompat import OrderedDict, dask_array_type
66

77
RESAMPLE_DIM = '__resample_dim__'
@@ -277,15 +277,14 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
277277
"""Reduce the items in this group by applying `func` along the
278278
pre-defined resampling dimension.
279279
280-
Note that `dim` is by default here and ignored if passed by the user;
281-
this ensures compatibility with the existing reduce interface.
282-
283280
Parameters
284281
----------
285282
func : function
286283
Function which can be called in the form
287284
`func(x, axis=axis, **kwargs)` to return the result of collapsing
288285
an np.ndarray over an integer valued axis.
286+
dim : str or sequence of str, optional
287+
Dimension(s) over which to apply `func`.
289288
keep_attrs : bool, optional
290289
If True, the datasets's attributes (`attrs`) will be copied from
291290
the original object to the new one. If False (default), the new
@@ -299,8 +298,11 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
299298
Array with summarized data and the indicated dimension(s)
300299
removed.
301300
"""
301+
if dim == DEFAULT_DIMS:
302+
dim = None
303+
302304
return super(DatasetResample, self).reduce(
303-
func, self._dim, keep_attrs, **kwargs)
305+
func, dim, keep_attrs, **kwargs)
304306

305307
def _interpolate(self, kind='linear'):
306308
"""Apply scipy.interpolate.interp1d along resampling dimension."""

xarray/core/variable.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,8 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
13331333
Array with summarized data and the indicated dimension(s)
13341334
removed.
13351335
"""
1336+
if dim is common.ALL_DIMS:
1337+
dim = None
13361338
if dim is not None and axis is not None:
13371339
raise ValueError("cannot supply both 'axis' and 'dim' arguments")
13381340

xarray/tests/test_dask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,8 @@ def test_groupby(self):
385385
u = self.eager_array
386386
v = self.lazy_array
387387

388-
expected = u.groupby('x').mean()
389-
actual = v.groupby('x').mean()
388+
expected = u.groupby('x').mean(xr.ALL_DIMS)
389+
actual = v.groupby('x').mean(xr.ALL_DIMS)
390390
self.assertLazyAndAllClose(expected, actual)
391391

392392
def test_groupby_first(self):

xarray/tests/test_dataarray.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pickle
44
from copy import deepcopy
5+
from distutils.version import LooseVersion
56
from textwrap import dedent
67
import warnings
78

@@ -14,7 +15,7 @@
1415
DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options)
1516
from xarray.convert import from_cdms2
1617
from xarray.coding.times import CFDatetimeCoder, _import_cftime
17-
from xarray.core.common import full_like
18+
from xarray.core.common import full_like, ALL_DIMS
1819
from xarray.core.pycompat import OrderedDict, iteritems
1920
from xarray.tests import (
2021
ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal,
@@ -2000,15 +2001,15 @@ def test_groupby_sum(self):
20002001
self.x[:, 10:].sum(),
20012002
self.x[:, 9:10].sum()]).T),
20022003
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
2003-
assert_allclose(expected_sum_all, grouped.reduce(np.sum))
2004-
assert_allclose(expected_sum_all, grouped.sum())
2004+
assert_allclose(expected_sum_all, grouped.reduce(np.sum, dim=ALL_DIMS))
2005+
assert_allclose(expected_sum_all, grouped.sum(ALL_DIMS))
20052006

20062007
expected = DataArray([array['y'].values[idx].sum() for idx
20072008
in [slice(9), slice(10, None), slice(9, 10)]],
20082009
[['a', 'b', 'c']], ['abc'])
20092010
actual = array['y'].groupby('abc').apply(np.sum)
20102011
assert_allclose(expected, actual)
2011-
actual = array['y'].groupby('abc').sum()
2012+
actual = array['y'].groupby('abc').sum(ALL_DIMS)
20122013
assert_allclose(expected, actual)
20132014

20142015
expected_sum_axis1 = Dataset(
@@ -2019,6 +2020,27 @@ def test_groupby_sum(self):
20192020
assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, 'y'))
20202021
assert_allclose(expected_sum_axis1, grouped.sum('y'))
20212022

2023+
def test_groupby_warning(self):
2024+
array = self.make_groupby_example_array()
2025+
grouped = array.groupby('y')
2026+
with pytest.warns(FutureWarning):
2027+
grouped.sum()
2028+
2029+
@pytest.mark.skipif(LooseVersion(xr.__version__) < LooseVersion('0.12'),
2030+
reason="not to forget the behavior change")
2031+
def test_groupby_sum_default(self):
2032+
array = self.make_groupby_example_array()
2033+
grouped = array.groupby('abc')
2034+
2035+
expected_sum_all = Dataset(
2036+
{'foo': Variable(['x', 'abc'],
2037+
np.array([self.x[:, :9].sum(axis=-1),
2038+
self.x[:, 10:].sum(axis=-1),
2039+
self.x[:, 9:10].sum(axis=-1)]).T),
2040+
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
2041+
2042+
assert_allclose(expected_sum_all, grouped.sum())
2043+
20222044
def test_groupby_count(self):
20232045
array = DataArray(
20242046
[0, 0, np.nan, np.nan, 0, 0],
@@ -2099,9 +2121,9 @@ def test_groupby_math(self):
20992121
assert_identical(expected, actual)
21002122

21012123
grouped = array.groupby('abc')
2102-
expected_agg = (grouped.mean() - np.arange(3)).rename(None)
2124+
expected_agg = (grouped.mean(ALL_DIMS) - np.arange(3)).rename(None)
21032125
actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
2104-
actual_agg = actual.groupby('abc').mean()
2126+
actual_agg = actual.groupby('abc').mean(ALL_DIMS)
21052127
assert_allclose(expected_agg, actual_agg)
21062128

21072129
with raises_regex(TypeError, 'only support binary ops'):
@@ -2175,7 +2197,7 @@ def test_groupby_multidim(self):
21752197
('lon', DataArray([5, 28, 23],
21762198
coords=[('lon', [30., 40., 50.])])),
21772199
('lat', DataArray([16, 40], coords=[('lat', [10., 20.])]))]:
2178-
actual_sum = array.groupby(dim).sum()
2200+
actual_sum = array.groupby(dim).sum(ALL_DIMS)
21792201
assert_identical(expected_sum, actual_sum)
21802202

21812203
def test_groupby_multidim_apply(self):

xarray/tests/test_dataset.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import xarray as xr
1515
from xarray import (
1616
DataArray, Dataset, IndexVariable, MergeError, Variable, align, backends,
17-
broadcast, open_dataset, set_options)
17+
broadcast, open_dataset, set_options, ALL_DIMS)
1818
from xarray.core import indexing, npcompat, utils
1919
from xarray.core.common import full_like
2020
from xarray.core.pycompat import (
@@ -2648,20 +2648,28 @@ def test_groupby_reduce(self):
26482648

26492649
expected = data.mean('y')
26502650
expected['yonly'] = expected['yonly'].variable.set_dims({'x': 3})
2651-
actual = data.groupby('x').mean()
2651+
actual = data.groupby('x').mean(ALL_DIMS)
26522652
assert_allclose(expected, actual)
26532653

26542654
actual = data.groupby('x').mean('y')
26552655
assert_allclose(expected, actual)
26562656

26572657
letters = data['letters']
2658-
expected = Dataset({'xy': data['xy'].groupby(letters).mean(),
2658+
expected = Dataset({'xy': data['xy'].groupby(letters).mean(ALL_DIMS),
26592659
'xonly': (data['xonly'].mean().variable
26602660
.set_dims({'letters': 2})),
26612661
'yonly': data['yonly'].groupby(letters).mean()})
2662-
actual = data.groupby('letters').mean()
2662+
actual = data.groupby('letters').mean(ALL_DIMS)
26632663
assert_allclose(expected, actual)
26642664

2665+
def test_groupby_warn(self):
2666+
data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)),
2667+
'xonly': ('x', np.random.randn(3)),
2668+
'yonly': ('y', np.random.randn(4)),
2669+
'letters': ('y', ['a', 'a', 'b', 'b'])})
2670+
with pytest.warns(FutureWarning):
2671+
data.groupby('x').mean()
2672+
26652673
def test_groupby_math(self):
26662674
def reorder_dims(x):
26672675
return x.transpose('dim1', 'dim2', 'dim3', 'time')
@@ -2716,7 +2724,7 @@ def test_groupby_math_virtual(self):
27162724
ds = Dataset({'x': ('t', [1, 2, 3])},
27172725
{'t': pd.date_range('20100101', periods=3)})
27182726
grouped = ds.groupby('t.day')
2719-
actual = grouped - grouped.mean()
2727+
actual = grouped - grouped.mean(ALL_DIMS)
27202728
expected = Dataset({'x': ('t', [0, 0, 0])},
27212729
ds[['t', 't.day']])
27222730
assert_identical(actual, expected)
@@ -2725,18 +2733,17 @@ def test_groupby_nan(self):
27252733
# nan should be excluded from groupby
27262734
ds = Dataset({'foo': ('x', [1, 2, 3, 4])},
27272735
{'bar': ('x', [1, 1, 2, np.nan])})
2728-
actual = ds.groupby('bar').mean()
2736+
actual = ds.groupby('bar').mean(ALL_DIMS)
27292737
expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
27302738
assert_identical(actual, expected)
27312739

27322740
def test_groupby_order(self):
27332741
# groupby should preserve variables order
2734-
27352742
ds = Dataset()
27362743
for vn in ['a', 'b', 'c']:
27372744
ds[vn] = DataArray(np.arange(10), dims=['t'])
27382745
data_vars_ref = list(ds.data_vars.keys())
2739-
ds = ds.groupby('t').mean()
2746+
ds = ds.groupby('t').mean(ALL_DIMS)
27402747
data_vars = list(ds.data_vars.keys())
27412748
assert data_vars == data_vars_ref
27422749
# coords are now at the end of the list, so the test below fails

0 commit comments

Comments
 (0)