Skip to content

Fix rolling window operations with dask when bottleneck is installed #3040

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ Enhancements
Bug fixes
~~~~~~~~~

- Rolling operations on xarray objects containing dask arrays could silently
compute the incorrect result or use large amounts of memory (:issue:`2940`).
By `Stephan Hoyer <https://github.com/shoyer>`_.
- Don't set encoding attributes on bounds variables when writing to netCDF.
(:issue:`2921`)
By `Deepak Cherian <https://github.com/dcherian>`_.
Expand Down
75 changes: 0 additions & 75 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@
NAN_REDUCE_METHODS = ['argmax', 'argmin', 'max', 'min', 'mean', 'prod', 'sum',
'std', 'var', 'median']
NAN_CUM_METHODS = ['cumsum', 'cumprod']
BOTTLENECK_ROLLING_METHODS = {'move_sum': 'sum', 'move_mean': 'mean',
'move_std': 'std', 'move_min': 'min',
'move_max': 'max', 'move_var': 'var',
'move_argmin': 'argmin', 'move_argmax': 'argmax',
'move_median': 'median'}
# TODO: wrap take, dot, sort


Expand Down Expand Up @@ -103,20 +98,6 @@
If fewer than min_count non-NA values are present the result will
be NA. New in version 0.10.8: Added with the default being None."""

_ROLLING_REDUCE_DOCSTRING_TEMPLATE = """\
Reduce this {da_or_ds}'s data windows by applying `{name}` along its dimension.

Parameters
----------
**kwargs : dict
Additional keyword arguments passed on to `{name}`.

Returns
-------
reduced : {da_or_ds}
New {da_or_ds} object with `{name}` applied along its rolling dimnension.
"""

_COARSEN_REDUCE_DOCSTRING_TEMPLATE = """\
Coarsen this object by applying `{name}` along its dimensions.

Expand Down Expand Up @@ -236,13 +217,6 @@ def func(self, *args, **kwargs):
return func


def rolling_count(rolling):

rolling_count = rolling._counts()
enough_periods = rolling_count >= rolling._min_periods
return rolling_count.where(enough_periods)


def inject_reduce_methods(cls):
methods = ([(name, getattr(duck_array_ops, 'array_%s' % name), False)
for name in REDUCE_METHODS] +
Expand Down Expand Up @@ -340,55 +314,6 @@ def inject_all_ops_and_reduce_methods(cls, priority=50, array_only=True):
inject_cum_methods(cls)


def inject_bottleneck_rolling_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name))
for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='DataArray')
setattr(cls, name, func)

# bottleneck doesn't offer rolling_count, so we construct it ourselves
func = rolling_count
func.__name__ = 'count'
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='DataArray')
setattr(cls, 'count', func)

# bottleneck rolling methods
if not has_bottleneck:
return

for bn_name, method_name in BOTTLENECK_ROLLING_METHODS.items():
f = getattr(bn, bn_name)
func = cls._bottleneck_reduce(f)
func.__name__ = method_name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='DataArray')
setattr(cls, method_name, func)


def inject_datasetrolling_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name))
for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='Dataset')
setattr(cls, name, func)
# bottleneck doesn't offer rolling_count, so we construct it ourselves
func = rolling_count
func.__name__ = 'count'
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='Dataset')
setattr(cls, 'count', func)


def inject_coarsen_methods(cls):
# standard numpy reduce methods
methods = [(name, getattr(duck_array_ops, name))
Expand Down
Loading