Skip to content

Vectorized indexing with cache=False #1897

@jhamman

Description

@jhamman

Code Sample, a copy-pastable example if possible

import numpy as np
import xarray as xr
n_times = 4; n_lats = 10; n_lons = 15
n_points = 4

ds = xr.Dataset({'test_var': (['time', 'latitude', 'longitude'],
                              np.random.random((n_times, n_lats, n_lons)))})
ds.to_netcdf('test.nc')

rand_lons = xr.Variable('points', np.random.randint(0, high=n_lons, size=n_points))
rand_lats = xr.Variable('points', np.random.randint(0, high=n_lats, size=n_points))

ds = xr.open_dataset('test.nc', cache=False)
points = ds['test_var'][:, rand_lats, rand_lons]

yields:

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-7-f16e4cae9456> in <module>()
     12 
     13 ds = xr.open_dataset('test.nc', cache=False)
---> 14 points = ds['test_var'][:, rand_lats, rand_lons]

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/dataarray.py in __getitem__(self, key)
    478         else:
    479             # xarray-style array indexing
--> 480             return self.isel(**self._item_key_to_dict(key))
    481 
    482     def __setitem__(self, key, value):

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/dataarray.py in isel(self, drop, **indexers)
    759         DataArray.sel
    760         """
--> 761         ds = self._to_temp_dataset().isel(drop=drop, **indexers)
    762         return self._from_temp_dataset(ds)
    763 

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/dataset.py in isel(self, drop, **indexers)
   1390         for name, var in iteritems(self._variables):
   1391             var_indexers = {k: v for k, v in indexers_list if k in var.dims}
-> 1392             new_var = var.isel(**var_indexers)
   1393             if not (drop and name in var_indexers):
   1394                 variables[name] = new_var

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/variable.py in isel(self, **indexers)
    851             if dim in indexers:
    852                 key[i] = indexers[dim]
--> 853         return self[tuple(key)]
    854 
    855     def squeeze(self, dim=None):

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/variable.py in __getitem__(self, key)
    620         """
    621         dims, indexer, new_order = self._broadcast_indexes(key)
--> 622         data = as_indexable(self._data)[indexer]
    623         if new_order:
    624             data = np.moveaxis(data, range(len(new_order)), new_order)

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/indexing.py in __getitem__(self, key)
    554 
    555     def __getitem__(self, key):
--> 556         return type(self)(_wrap_numpy_scalars(self.array[key]))
    557 
    558     def __setitem__(self, key, value):

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/indexing.py in __getitem__(self, indexer)
    521 
    522     def __getitem__(self, indexer):
--> 523         return type(self)(self.array, self._updated_key(indexer))
    524 
    525     def __setitem__(self, key, value):

~/anaconda/envs/pangeo/lib/python3.6/site-packages/xarray/core/indexing.py in _updated_key(self, new_key)
    491                 'Vectorized indexing for {} is not implemented. Load your '
    492                 'data first with .load() or .compute(), or disable caching by '
--> 493                 'setting cache=False in open_dataset.'.format(type(self)))
    494 
    495         iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim))

NotImplementedError: Vectorized indexing for <class 'xarray.core.indexing.LazilyIndexedArray'> is not implemented. Load your data first with .load() or .compute(), or disable caching by setting cache=False in open_dataset.

Problem description

Raising a NotImplementedError here is fine but it instructs the user to "disable caching by setting cache=False in open_dataset" which I've already done. So my questions are 1) should we expect this to work and 2) if not

Expected Output

Ideally, we can get the same behavior as:

ds = xr.open_dataset('test2.nc', cache=False).load()
points = ds['test_var'][:, rand_lats, rand_lons]

<xarray.DataArray 'test_var' (time: 4, points: 4)>
array([[0.939469, 0.406885, 0.939469, 0.759075],
       [0.470116, 0.585546, 0.470116, 0.37833 ],
       [0.274321, 0.648218, 0.274321, 0.383391],
       [0.754121, 0.078878, 0.754121, 0.903788]])
Dimensions without coordinates: time, points

without needing to use .load()

Output of xr.show_versions()

INSTALLED VERSIONS ------------------ commit: None python: 3.6.4.final.0 python-bits: 64 OS: Linux OS-release: 3.10.0-693.5.2.el7.x86_64 machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: en_US.UTF-8 LOCALE: en_US.UTF-8

xarray: 0.10.0+dev55.g1d32399
pandas: 0.22.0
numpy: 1.14.0
scipy: 1.0.0
netCDF4: 1.3.1
h5netcdf: 0.5.0
h5py: 2.7.1
Nio: None
zarr: None
bottleneck: 1.2.1
cyordereddict: None
dask: 0.16.1
distributed: 1.20.2
matplotlib: 2.1.2
cartopy: 0.15.1
seaborn: 0.8.1
setuptools: 38.4.0
pip: 9.0.1
conda: None
pytest: 3.4.0
IPython: 6.2.1
sphinx: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions