Skip to content

Remove the old syntax for resample. #2541

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ Breaking changes
includes only data variables.
- ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
array data, not coordinates.
- The old resample syntax from before xarray 0.10, e.g.,
``data.resample('1D', dim='time', how='mean')``, is no longer supported will
raise an error in most cases. You need to use the new resample syntax
instead, e.g., ``data.resample(time='1D').mean()`` or
``data.resample({'time': '1D'}).mean()``.
- Xarray's storage backends now automatically open and close files when
necessary, rather than requiring opening a file with ``autoclose=True``. A
global least-recently-used cache is used to store open files; the default
Expand Down Expand Up @@ -108,6 +113,10 @@ Enhancements
python driver and *ecCodes* C-library. (:issue:`2475`)
By `Alessandro Amici <https://github.com/alexamici>`_,
sponsored by `ECMWF <https://github.com/ecmwf>`_.
- Resample now supports a dictionary mapping from dimension to frequency as
its first argument, e.g., ``data.resample({'time': '1D'}).mean()``. This is
consistent with other xarray functions that accept either dictionaries or
keyword arguments. By `Stephan Hoyer <https://github.com/shoyer>`_.

Bug fixes
~~~~~~~~~
Expand Down
94 changes: 21 additions & 73 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
Set the labels at the center of the window.
**dim_kwargs : optional
The keyword arguments form of ``dim``.
One of dim or dim_kwarg must be provided.
One of dim or dim_kwargs must be provided.

Returns
-------
Expand Down Expand Up @@ -591,15 +591,17 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
return self._rolling_cls(self, dim, min_periods=min_periods,
center=center)

def resample(self, freq=None, dim=None, how=None, skipna=None,
closed=None, label=None, base=0, keep_attrs=None, **indexer):
def resample(self, indexer=None, skipna=None, closed=None, label=None,
base=0, keep_attrs=None, **indexer_kwargs):
"""Returns a Resample object for performing resampling operations.

Handles both downsampling and upsampling. If any intervals contain no
values from the original object, they will be given the value ``NaN``.

Parameters
----------
indexer : {dim: freq}, optional
Mapping from the dimension name to resample frequency.
skipna : bool, optional
Whether to skip missing values when aggregating in downsampling.
closed : 'left' or 'right', optional
Expand All @@ -614,9 +616,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
object will be returned without attributes.
**indexer : {dim: freq}
Dictionary with a key indicating the dimension name to resample
over and a value corresponding to the resampling frequency.
**indexer_kwargs : {dim: freq}
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Returns
-------
Expand Down Expand Up @@ -664,30 +666,24 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

if dim is not None:
if how is None:
how = 'mean'
return self._resample_immediately(freq, dim, how, skipna, closed,
label, base, keep_attrs)
# note: the second argument (now 'skipna') use to be 'dim'
if ((skipna is not None and not isinstance(skipna, bool))
or ('how' in indexer_kwargs and 'how' not in self.dims)
or ('dim' in indexer_kwargs and 'dim' not in self.dims)):
raise TypeError('resample() no longer supports the `how` or '
'`dim` arguments. Instead call methods on resample '
"objects, e.g., data.resample(time='1D').mean()")

indexer = either_dict_or_kwargs(indexer, indexer_kwargs, 'resample')

if (how is not None) and indexer:
raise TypeError("If passing an 'indexer' then 'dim' "
"and 'how' should not be used")

# More than one indexer is ambiguous, but we do in fact need one if
# "dim" was not provided, until the old API is fully deprecated
if len(indexer) != 1:
raise ValueError(
"Resampling only supported along single dimensions."
)
dim, freq = indexer.popitem()

if isinstance(dim, basestring):
dim_name = dim
dim = self[dim]
else:
raise TypeError("Dimension name should be a string; "
"was passed %r" % dim)
dim_name = dim
dim_coord = self[dim]

if isinstance(self.indexes[dim_name], CFTimeIndex):
raise NotImplementedError(
Expand All @@ -702,63 +698,15 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
'errors.'
)

group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
group = DataArray(dim_coord, coords=dim_coord.coords,
dims=dim_coord.dims, name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
resampler = self._resample_cls(self, group=group, dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM)

return resampler

def _resample_immediately(self, freq, dim, how, skipna,
closed, label, base, keep_attrs):
"""Implement the original version of .resample() which immediately
executes the desired resampling operation. """
from .dataarray import DataArray
from ..coding.cftimeindex import CFTimeIndex

RESAMPLE_DIM = '__resample_dim__'

warnings.warn("\n.resample() has been modified to defer "
"calculations. Instead of passing 'dim' and "
"how=\"{how}\", instead consider using "
".resample({dim}=\"{freq}\").{how}('{dim}') ".format(
dim=dim, freq=freq, how=how),
FutureWarning, stacklevel=3)

if isinstance(self.indexes[dim], CFTimeIndex):
raise NotImplementedError(
'Resample is currently not supported along a dimension '
'indexed by a CFTimeIndex. For certain kinds of downsampling '
'it may be possible to work around this by converting your '
'time index to a DatetimeIndex using '
'CFTimeIndex.to_datetimeindex. Use caution when doing this '
'however, because switching to a DatetimeIndex from a '
'CFTimeIndex with a non-standard calendar entails a change '
'in the calendar type, which could lead to subtle and silent '
'errors.'
)

if isinstance(dim, basestring):
dim = self[dim]

group = DataArray(dim, [(dim.dims, dim)], name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, how=how, closed=closed, label=label,
base=base)
gb = self._groupby_cls(self, group, grouper=grouper)
if isinstance(how, basestring):
f = getattr(gb, how)
if how in ['first', 'last']:
result = f(skipna=skipna, keep_attrs=keep_attrs)
elif how == 'count':
result = f(dim=dim.name, keep_attrs=keep_attrs)
else:
result = f(dim=dim.name, skipna=skipna, keep_attrs=keep_attrs)
else:
result = gb.reduce(how, dim=dim.name, keep_attrs=keep_attrs)
result = result.rename({RESAMPLE_DIM: dim.name})
return result

def where(self, cond, other=dtypes.NA, drop=False):
"""Filter elements from this object according to a condition.

Expand Down
53 changes: 12 additions & 41 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2356,53 +2356,24 @@ def test_resample_drop_nondim_coords(self):
actual = array.resample(time="1H").interpolate('linear')
assert 'tc' not in actual.coords

def test_resample_old_vs_new_api(self):
def test_resample_keep_attrs(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.ones(10), [('time', times)])
array.attrs['meta'] = 'data'

# Simple mean
with pytest.warns(FutureWarning):
old_mean = array.resample('1D', 'time', how='mean')
new_mean = array.resample(time='1D').mean()
assert_identical(old_mean, new_mean)

# Mean, while keeping attributes
attr_array = array.copy()
attr_array.attrs['meta'] = 'data'

with pytest.warns(FutureWarning):
old_mean = attr_array.resample('1D', dim='time', how='mean',
keep_attrs=True)
new_mean = attr_array.resample(time='1D').mean(keep_attrs=True)
assert old_mean.attrs == new_mean.attrs
assert_identical(old_mean, new_mean)
result = array.resample(time='1D').mean(keep_attrs=True)
expected = DataArray([1, 1, 1], [('time', times[::4])],
attrs=array.attrs)
assert_identical(result, expected)

# Mean, with NaN to skip
nan_array = array.copy()
nan_array[1] = np.nan
def test_resample_skipna(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.ones(10), [('time', times)])
array[1] = np.nan

with pytest.warns(FutureWarning):
old_mean = nan_array.resample('1D', 'time', how='mean',
skipna=False)
new_mean = nan_array.resample(time='1D').mean(skipna=False)
result = array.resample(time='1D').mean(skipna=False)
expected = DataArray([np.nan, 1, 1], [('time', times[::4])])
assert_identical(old_mean, expected)
assert_identical(new_mean, expected)

# Try other common resampling methods
resampler = array.resample(time='1D')
for method in ['mean', 'median', 'sum', 'first', 'last', 'count']:
# Discard attributes on the call using the new api to match
# convention from old api
new_api = getattr(resampler, method)(keep_attrs=False)
with pytest.warns(FutureWarning):
old_api = array.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
for method in [np.mean, np.sum, np.max, np.min]:
new_api = resampler.reduce(method)
with pytest.warns(FutureWarning):
old_api = array.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
assert_identical(result, expected)

def test_upsample(self):
times = pd.date_range('2000-01-01', freq='6H', periods=5)
Expand Down
19 changes: 9 additions & 10 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2853,22 +2853,21 @@ def test_resample_drop_nondim_coords(self):
actual = ds.resample(time="1H").interpolate('linear')
assert 'tc' not in actual.coords

def test_resample_old_vs_new_api(self):
def test_resample_old_api(self):

times = pd.date_range('2000-01-01', freq='6H', periods=10)
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
'bar': ('time', np.random.randn(10), {'meta': 'data'}),
'time': times})
ds.attrs['dsmeta'] = 'dsdata'

for method in ['mean', 'sum', 'count', 'first', 'last']:
resampler = ds.resample(time='1D')
# Discard attributes on the call using the new api to match
# convention from old api
new_api = getattr(resampler, method)(keep_attrs=False)
with pytest.warns(FutureWarning):
old_api = ds.resample('1D', dim='time', how=method)
assert_identical(new_api, old_api)
with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', 'time')

with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', dim='time', how='mean')

with raises_regex(TypeError, r'resample\(\) no longer supports'):
ds.resample('1D', dim='time')

def test_to_array(self):
ds = Dataset(OrderedDict([('a', 1), ('b', ('x', [1, 2, 3]))]),
Expand Down