Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use _unstack_once for valid dask and sparse versions #5315

Merged
merged 26 commits into from
May 17, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
8497d91
optional import versions to pycompat
Illviljan May 15, 2021
2c11948
Update indexing.py
Illviljan May 15, 2021
aeea5cc
move dtypes to avoid circular import
Illviljan May 15, 2021
8fa535c
Update pycompat.py
Illviljan May 15, 2021
ec6c301
faster unstacking
Illviljan May 15, 2021
3f2784d
Update dataset.py
Illviljan May 15, 2021
de15c39
Update dataset.py
Illviljan May 15, 2021
682922f
have to check that the array type is in use
Illviljan May 15, 2021
dba4693
Update dataset.py
Illviljan May 15, 2021
bb74dc9
sparse arg requires the slow path?
Illviljan May 15, 2021
8f15a28
cupy.__version__
Illviljan May 15, 2021
cd39b67
Merge branch 'Illviljan-version_to_pycompat' into Illviljan-faster_un…
Illviljan May 15, 2021
c2ce0cf
test pint_array_type
Illviljan May 15, 2021
9992824
Update dataset.py
Illviljan May 15, 2021
95fc10e
Add pint check in pycompat
Illviljan May 15, 2021
2597619
Update pycompat.py
Illviljan May 15, 2021
cf80360
revert pint test
Illviljan May 15, 2021
b41c113
import whole dtypes
Illviljan May 16, 2021
df425a4
Merge branch 'Illviljan-version_to_pycompat' into Illviljan-faster_un…
Illviljan May 16, 2021
a31754b
Test turning off the ndarray check
Illviljan May 16, 2021
207609b
sparse and pint doesn't work. Switch to a restrictive if
Illviljan May 16, 2021
7bb5aa5
Update dataset.py
Illviljan May 16, 2021
0d08e35
Merge branch 'master' into Illviljan-faster_unstacking
Illviljan May 17, 2021
d5b31ae
Add back some comments and add some relevant issue links
Illviljan May 17, 2021
35db4a8
lint
Illviljan May 17, 2021
7ee325d
Update dataset.py
Illviljan May 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions xarray/core/dask_array_compat.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import warnings
from distutils.version import LooseVersion

import numpy as np

from .pycompat import dask_version

try:
import dask.array as da
from dask import __version__ as dask_version
except ImportError:
dask_version = "0.0.0"
da = None


Expand Down Expand Up @@ -57,7 +56,7 @@ def pad(array, pad_width, mode="constant", **kwargs):
return padded


if LooseVersion(dask_version) > LooseVersion("2.30.0"):
if dask_version > "2.30.0":
ensure_minimum_chunksize = da.overlap.ensure_minimum_chunksize
else:

Expand Down Expand Up @@ -114,7 +113,7 @@ def ensure_minimum_chunksize(size, chunks):
return tuple(output)


if LooseVersion(dask_version) > LooseVersion("2021.03.0"):
if dask_version > "2021.03.0":
sliding_window_view = da.lib.stride_tricks.sliding_window_view
else:

Expand Down Expand Up @@ -180,7 +179,7 @@ def sliding_window_view(x, window_shape, axis=None):
axis=axis,
)
# map_overlap's signature changed in https://github.com/dask/dask/pull/6165
if LooseVersion(dask_version) > "2.18.0":
if dask_version > "2.18.0":
return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs)
else:
return map_overlap(x, _np_sliding_window_view, **kwargs)
30 changes: 20 additions & 10 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@
)
from .missing import get_clean_interp_index
from .options import OPTIONS, _get_keep_attrs
from .pycompat import is_duck_dask_array, sparse_array_type
from .pycompat import (
dask_version,
is_duck_dask_array,
sparse_array_type,
sparse_version,
)
from .utils import (
Default,
Frozen,
Expand Down Expand Up @@ -4030,17 +4035,22 @@ def unstack(
for dim in dims:

if (
# Dask arrays don't support assignment by index, which the fast unstack
# function requires.
# https://github.com/pydata/xarray/pull/4746#issuecomment-753282125
any(is_duck_dask_array(v.data) for v in self.variables.values())
# Sparse doesn't currently support (though we could special-case
# it)
# Dask arrays supports assignment by index,
# https://github.com/dask/dask/pull/7393
(
dask_version < "2021.04.0"
and any(is_duck_dask_array(v.data) for v in self.variables.values())
)
# Sparse now supports kwargs in full_like,
# https://github.com/pydata/sparse/issues/422
or any(
isinstance(v.data, sparse_array_type)
for v in self.variables.values()
or (
sparse_version < "0.11.2"
and any(
isinstance(v.data, sparse_array_type)
for v in self.variables.values()
)
)
# Shifting the arrays to sparse requires _unstack_full_reindex:
Illviljan marked this conversation as resolved.
Show resolved Hide resolved
or sparse
# Until https://github.com/pydata/xarray/pull/4751 is resolved,
# we check explicitly whether it's a numpy array. Once that is
Expand Down
11 changes: 2 additions & 9 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,15 @@
import functools
import operator
from collections import defaultdict
from distutils.version import LooseVersion
from typing import Any, Callable, Iterable, List, Sequence, Tuple, Union

import numpy as np
import pandas as pd

try:
import dask

DASK_VERSION = LooseVersion(dask.__version__)
except ModuleNotFoundError:
DASK_VERSION = LooseVersion("0")

from . import duck_array_ops, nputils, utils
from .pycompat import (
dask_array_type,
dask_version,
integer_types,
is_duck_dask_array,
sparse_array_type,
Expand Down Expand Up @@ -1393,7 +1386,7 @@ def __getitem__(self, key):
return value

def __setitem__(self, key, value):
if DASK_VERSION >= "2021.04.1":
if dask_version >= "2021.04.1":
if isinstance(key, BasicIndexer):
self.array[key.tuple] = value
elif isinstance(key, VectorizedIndexer):
Expand Down
10 changes: 10 additions & 0 deletions xarray/core/pycompat.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
from distutils.version import LooseVersion

import numpy as np

from .utils import is_duck_array

integer_types = (int, np.integer)

try:
import dask
import dask.array
from dask.base import is_dask_collection

dask_version = LooseVersion(dask.__version__)

# solely for isinstance checks
dask_array_type = (dask.array.Array,)

Expand All @@ -16,6 +21,7 @@ def is_duck_dask_array(x):


except ImportError: # pragma: no cover
dask_version = LooseVersion("0.0.0")
dask_array_type = ()
is_duck_dask_array = lambda _: False
is_dask_collection = lambda _: False
Expand All @@ -24,14 +30,18 @@ def is_duck_dask_array(x):
# solely for isinstance checks
import sparse

sparse_version = LooseVersion(sparse.__version__)
sparse_array_type = (sparse.SparseArray,)
except ImportError: # pragma: no cover
sparse_version = LooseVersion("0.0.0")
sparse_array_type = ()

try:
# solely for isinstance checks
import cupy

cupy_version = LooseVersion(sparse.__version__)
cupy_array_type = (cupy.ndarray,)
except ImportError: # pragma: no cover
cupy_version = LooseVersion("0.0.0")
cupy_array_type = ()
5 changes: 2 additions & 3 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
import numpy as np
import pandas as pd

from . import dtypes

K = TypeVar("K")
V = TypeVar("V")
T = TypeVar("T")
Expand Down Expand Up @@ -83,9 +81,10 @@ def maybe_coerce_to_str(index, original_coords):

pd.Index uses object-dtype to store str - try to avoid this for coords
"""
from .dtypes import result_type as dtypes_result_type

try:
result_type = dtypes.result_type(*original_coords)
result_type = dtypes_result_type(*original_coords)
except TypeError:
pass
else:
Expand Down