-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
What happened?
This is a little odd, if I rechunk with TimeResampler("ME")
that only has January-March, it introduces unknown chunks causing Dask to raise an error.
What did you expect to happen?
Just calculate the chunks for January - March and ignore the missing months
cc @dcherian is this an expected limitation?
Minimal Complete Verifiable Example
import fsspec
import xarray as xr
from xarray.groupers import TimeResampler
ds = xr.open_zarr(
fsspec.get_mapper("s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr", anon=True),
consolidated=True,
)
# Slice for subset of years and months
subset = ds.zwattablrt.sel(time=slice("2001", "2002"))
# removing this line makes it work
subset = subset.sel(time=subset.time.dt.month.isin((1, 2, 3)))
mean_rechunked_cohorts = subset.chunk(time=TimeResampler("ME"))
MVCE confirmation
- Minimal example — the example is as focused as reasonably possible to demonstrate the underlying issue in xarray.
- Complete example — the example is self-contained, including all data and the text of any traceback.
- Verifiable example — the example copy & pastes into an IPython prompt or Binder notebook, returning the result.
- New issue — a search of GitHub Issues suggests this is not a duplicate.
- Recent environment — the issue occurs with the latest version of xarray and its dependencies.
Relevant log output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 3
1 subset = ds.zwattablrt.sel(time=slice("2001", "2002"))
2 subset = subset.sel(time=subset.time.dt.month.isin((1, 2, 3)))
----> 3 mean_rechunked_cohorts = subset.chunk(time=TimeResampler("ME"))#.groupby("time.month").mean()
4 #mean_rechunked_cohorts.persist()
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/util/deprecation_helpers.py:115, in _deprecate_positional_args.<locals>._decorator.<locals>.inner(*args, **kwargs)
111 kwargs.update({name: arg for name, arg in zip_args})
113 return func(*args[:-n_extra_args], **kwargs)
--> 115 return func(*args, **kwargs)
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/core/dataarray.py:1438, in DataArray.chunk(self, chunks, name_prefix, token, lock, inline_array, chunked_array_type, from_array_kwargs, **chunks_kwargs)
1435 else:
1436 chunk_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
-> 1438 ds = self._to_temp_dataset().chunk(
1439 chunk_mapping,
1440 name_prefix=name_prefix,
1441 token=token,
1442 lock=lock,
1443 inline_array=inline_array,
1444 chunked_array_type=chunked_array_type,
1445 from_array_kwargs=from_array_kwargs,
1446 )
1447 return self._from_temp_dataset(ds)
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/core/dataset.py:2779, in Dataset.chunk(self, chunks, name_prefix, token, lock, inline_array, chunked_array_type, from_array_kwargs, **chunks_kwargs)
2776 if from_array_kwargs is None:
2777 from_array_kwargs = {}
-> 2779 variables = {
2780 k: _maybe_chunk(
2781 k,
2782 v,
2783 chunks_mapping_ints,
2784 token,
2785 lock,
2786 name_prefix,
2787 inline_array=inline_array,
2788 chunked_array_type=chunkmanager,
2789 from_array_kwargs=from_array_kwargs.copy(),
2790 )
2791 for k, v in self.variables.items()
2792 }
2793 return self._replace(variables)
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/core/dataset.py:2780, in <dictcomp>(.0)
2776 if from_array_kwargs is None:
2777 from_array_kwargs = {}
2779 variables = {
-> 2780 k: _maybe_chunk(
2781 k,
2782 v,
2783 chunks_mapping_ints,
2784 token,
2785 lock,
2786 name_prefix,
2787 inline_array=inline_array,
2788 chunked_array_type=chunkmanager,
2789 from_array_kwargs=from_array_kwargs.copy(),
2790 )
2791 for k, v in self.variables.items()
2792 }
2793 return self._replace(variables)
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/core/dataset.py:324, in _maybe_chunk(name, var, chunks, token, lock, name_prefix, overwrite_encoded_chunks, inline_array, chunked_array_type, from_array_kwargs)
315 name2 = f"{name_prefix}{name}-{token2}"
317 from_array_kwargs = utils.consolidate_dask_from_array_kwargs(
318 from_array_kwargs,
319 name=name2,
320 lock=lock,
321 inline_array=inline_array,
322 )
--> 324 var = var.chunk(
325 chunks,
326 chunked_array_type=chunked_array_type,
327 from_array_kwargs=from_array_kwargs,
328 )
330 if overwrite_encoded_chunks and var.chunks is not None:
331 var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/core/variable.py:2599, in Variable.chunk(self, chunks, name, lock, inline_array, chunked_array_type, from_array_kwargs, **chunks_kwargs)
2591 # TODO deprecate passing these dask-specific arguments explicitly. In future just pass everything via from_array_kwargs
2592 _from_array_kwargs = consolidate_dask_from_array_kwargs(
2593 from_array_kwargs,
2594 name=name,
2595 lock=lock,
2596 inline_array=inline_array,
2597 )
-> 2599 return super().chunk(
2600 chunks=chunks,
2601 chunked_array_type=chunked_array_type,
2602 from_array_kwargs=_from_array_kwargs,
2603 **chunks_kwargs,
2604 )
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/namedarray/core.py:826, in NamedArray.chunk(self, chunks, chunked_array_type, from_array_kwargs, **chunks_kwargs)
824 data_old = self._data
825 if chunkmanager.is_chunked_array(data_old):
--> 826 data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type]
827 else:
828 if not isinstance(data_old, ExplicitlyIndexed):
File ~/mambaforge/envs/dask-dev/lib/python3.11/site-packages/xarray/namedarray/parallelcompat.py:337, in ChunkManagerEntrypoint.rechunk(self, data, chunks, **kwargs)
308 def rechunk(
309 self,
310 data: T_ChunkedArray,
311 chunks: _NormalizedChunks | tuple[int, ...] | _Chunks,
312 **kwargs: Any,
313 ) -> Any:
314 """
315 Changes the chunking pattern of the given array.
316
(...)
335 cubed.Array.rechunk
336 """
--> 337 return data.rechunk(chunks, **kwargs)
File ~/PycharmProjects/dask_dev/dask/dask/array/core.py:2763, in Array.rechunk(self, chunks, threshold, block_size_limit, balance, method)
2753 """Convert blocks in dask array x for new chunks.
2754
2755 Refer to :func:`dask.array.rechunk` for full documentation.
(...)
2759 dask.array.rechunk : equivalent function
2760 """
2761 from dask.array.rechunk import rechunk # avoid circular import
-> 2763 return rechunk(self, chunks, threshold, block_size_limit, balance, method)
File ~/PycharmProjects/dask_dev/dask/dask/array/rechunk.py:362, in rechunk(x, chunks, threshold, block_size_limit, balance, method)
360 print(x.chunks)
361 print(chunks)
--> 362 _validate_rechunk(x.chunks, chunks)
364 method = method or config.get("array.rechunk.method")
366 if method == "tasks":
File ~/PycharmProjects/dask_dev/dask/dask/array/rechunk.py:261, in _validate_rechunk(old_chunks, new_chunks)
257 if old_shape != new_shape:
258 if not (
259 math.isnan(old_shape) and math.isnan(new_shape)
260 ) or not np.array_equal(old_dim, new_dim, equal_nan=True):
--> 261 raise ValueError(
262 "Chunks must be unchanging along dimensions with missing values.\n\n"
263 "A possible solution:\n x.compute_chunk_sizes()"
264 )
ValueError: Chunks must be unchanging along dimensions with missing values.
A possible solution:
x.compute_chunk_sizes()
Anything else we need to know?
xarray tries to change the time dimension chunks from
(216, 216, 216, 216, 216, 216, 144)
to
(248, 224, 248, nan, nan, nan, nan, nan, nan, nan, nan, nan, 248, 224, 248)
Environment
INSTALLED VERSIONS
commit: None
python: 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:34:54) [Clang 16.0.6 ]
python-bits: 64
OS: Darwin
OS-release: 23.4.0
machine: arm64
processor: arm
byteorder: little
LC_ALL: None
LANG: None
LOCALE: (None, 'UTF-8')
libhdf5: 1.14.3
libnetcdf: None
xarray: 2024.7.0
pandas: 2.2.2
numpy: 1.26.4
scipy: 1.14.0
netCDF4: None
pydap: None
h5netcdf: None
h5py: 3.11.0
zarr: 2.18.2
cftime: None
nc_time_axis: None
iris: None
bottleneck: None
dask: 2024.8.0+14.g60f2c1a8e.dirty
distributed: 2024.8.0+6.gfd92ab83
matplotlib: 3.9.1
cartopy: None
seaborn: None
numbagg: None
fsspec: 2024.6.1
cupy: None
pint: None
sparse: 0.15.4
flox: 0.9.9
numpy_groupies: 0.11.2
setuptools: 71.0.4
pip: 24.0
conda: None
pytest: 8.3.1
mypy: None
IPython: 8.26.0
sphinx: None
None