Closed
Description
Describe the bug
I cannot save EOF model on the newest xeofs package.
import pooch
pooch.retrieve(url="https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2/sst.mnmean.nc", known_hash=None, path ='.', fname='sst.mnmean.nc')
data_input = xr.open_dataset('sst.mnmean.nc', chunks = 'auto').sst.sel(time = slice('1982-01-01', '2022-12-31'))
from xeofs.models import EOF
model = EOF(
n_modes = 10,
standardize = 'False',
use_coslat = True,
)
model.fit(data_input, dim = 'time')
model.save('test_save.zarr', engine = 'zarr')
Expected behavior
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In[31], line 1
----> 1 model.save('test_save.zarr', engine = 'zarr')
File ~\anaconda3\envs\easytest\lib\site-packages\xeofs\models\_base_model.py:440, in _BaseModel.save(self, path, overwrite, save_data, engine, **kwargs)
437 if not save_data:
438 dt = insert_placeholders(dt)
--> 440 write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs)
File ~\anaconda3\envs\easytest\lib\site-packages\xeofs\utils\io.py:18, in write_model_tree(dt, path, overwrite, engine, **kwargs)
16 dt.to_netcdf(path, engine=engine, **kwargs)
17 elif engine == "zarr":
---> 18 dt.to_zarr(path, mode=write_mode, **kwargs)
19 else:
20 raise ValueError(f"Unknown engine {engine}")
File ~\anaconda3\envs\easytest\lib\site-packages\datatree\datatree.py:1526, in DataTree.to_zarr(self, store, mode, encoding, consolidated, **kwargs)
1501 """
1502 Write datatree contents to a Zarr store.
1503
(...)
1522 Additional keyword arguments to be passed to ``xarray.Dataset.to_zarr``
1523 """
1524 from .io import _datatree_to_zarr
-> 1526 _datatree_to_zarr(
1527 self,
1528 store,
1529 mode=mode,
1530 encoding=encoding,
1531 consolidated=consolidated,
1532 **kwargs,
1533 )
File ~\anaconda3\envs\easytest\lib\site-packages\datatree\io.py:211, in _datatree_to_zarr(dt, store, mode, encoding, consolidated, **kwargs)
209 _create_empty_zarr_group(store, group_path, mode)
210 else:
--> 211 ds.to_zarr(
212 store,
213 group=group_path,
214 mode=mode,
215 encoding=encoding.get(node.path),
216 consolidated=False,
217 **kwargs,
218 )
219 if "w" in mode:
220 mode = "a"
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\core\dataset.py:2521, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options, zarr_version, write_empty_chunks, chunkmanager_store_kwargs)
2382 """Write dataset contents to a zarr group.
2383
2384 Zarr chunks are determined in the following way:
(...)
2517 The I/O user guide, with more details and examples.
2518 """
2519 from xarray.backends.api import to_zarr
-> 2521 return to_zarr( # type: ignore[call-overload,misc]
2522 self,
2523 store=store,
2524 chunk_store=chunk_store,
2525 storage_options=storage_options,
2526 mode=mode,
2527 synchronizer=synchronizer,
2528 group=group,
2529 encoding=encoding,
2530 compute=compute,
2531 consolidated=consolidated,
2532 append_dim=append_dim,
2533 region=region,
2534 safe_chunks=safe_chunks,
2535 zarr_version=zarr_version,
2536 write_empty_chunks=write_empty_chunks,
2537 chunkmanager_store_kwargs=chunkmanager_store_kwargs,
2538 )
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\api.py:1832, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options, zarr_version, write_empty_chunks, chunkmanager_store_kwargs)
1830 writer = ArrayWriter()
1831 # TODO: figure out how to properly handle unlimited_dims
-> 1832 dump_to_store(dataset, zstore, writer, encoding=encoding)
1833 writes = writer.sync(
1834 compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs
1835 )
1837 if compute:
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\api.py:1362, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
1359 if encoder:
1360 variables, attrs = encoder(variables, attrs)
-> 1362 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:612, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
610 new_variables = set(variables) - existing_variable_names
611 variables_without_encoding = {vn: variables[vn] for vn in new_variables}
--> 612 variables_encoded, attributes = self.encode(
613 variables_without_encoding, attributes
614 )
616 if existing_variable_names:
617 # Decode variables directly, without going via xarray.Dataset to
618 # avoid needing to load index variables into memory.
619 # TODO: consider making loading indexes lazy again?
620 existing_vars, _, _ = conventions.decode_cf_variables(
621 self.get_variables(), self.get_attrs()
622 )
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\common.py:291, in AbstractWritableDataStore.encode(self, variables, attributes)
274 def encode(self, variables, attributes):
275 """
276 Encode the variables and attributes in this store
277
(...)
289
290 """
--> 291 variables = {k: self.encode_variable(v) for k, v in variables.items()}
292 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
293 return variables, attributes
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\common.py:291, in <dictcomp>(.0)
274 def encode(self, variables, attributes):
275 """
276 Encode the variables and attributes in this store
277
(...)
289
290 """
--> 291 variables = {k: self.encode_variable(v) for k, v in variables.items()}
292 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
293 return variables, attributes
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:568, in ZarrStore.encode_variable(self, variable)
567 def encode_variable(self, variable):
--> 568 variable = encode_zarr_variable(variable)
569 return variable
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:309, in encode_zarr_variable(var, needs_copy, name)
288 def encode_zarr_variable(var, needs_copy=True, name=None):
289 """
290 Converts an Variable into an Variable which follows some
291 of the CF conventions:
(...)
306 A variable which has been encoded as described above.
307 """
--> 309 var = conventions.encode_cf_variable(var, name=name)
311 # zarr allows unicode, but not variable-length strings, so it's both
312 # simpler and more compact to always encode as UTF-8 explicitly.
313 # TODO: allow toggling this explicitly via dtype in encoding.
314 coder = coding.strings.EncodedStringCoder(allows_unicode=True)
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\conventions.py:179, in encode_cf_variable(var, needs_copy, name)
157 def encode_cf_variable(
158 var: Variable, needs_copy: bool = True, name: T_Name = None
159 ) -> Variable:
160 """
161 Converts a Variable into a Variable which follows some
162 of the CF conventions:
(...)
177 A variable which has been encoded as described above.
178 """
--> 179 ensure_not_multiindex(var, name=name)
181 for coder in [
182 times.CFDatetimeCoder(),
183 times.CFTimedeltaCoder(),
(...)
190 variables.BooleanCoder(),
191 ]:
192 var = coder.encode(var, name=name)
File ~\anaconda3\envs\easytest\lib\site-packages\xarray\conventions.py:88, in ensure_not_multiindex(var, name)
86 def ensure_not_multiindex(var: Variable, name: T_Name = None) -> None:
87 if isinstance(var._data, indexing.PandasMultiIndexingAdapter):
---> 88 raise NotImplementedError(
89 f"variable {name!r} is a MultiIndex, which cannot yet be "
90 "serialized. Instead, either use reset_index() "
91 "to convert MultiIndex levels into coordinate variables instead "
92 "or use https://cf-xarray.readthedocs.io/en/latest/coding.html."
93 )
NotImplementedError: variable None is a MultiIndex, which cannot yet be serialized. Instead, either use reset_index() to convert MultiIndex levels into coordinate variables instead or use https://cf-xarray.readthedocs.io/en/latest/coding.html.
P.S. Exporting the netcdf file is also an error.
model.save('test_save.nc', engine = 'netcdf4')
Desktop:
- OS: Windows 11
xeofs
version 2.2.5xarray
: 2024.1.1zarr
: 2.16.1xarray-datatree
: 0.0.13cf-xarray
: 0.8.8
Additional context
It seems that the model after serialize (i.e., dt
, datatree.datatree.DataTree) is not suitable for exporting files, but I am not sure if it is the 'serialize' problem in xeof or the xarray-datatree problem
def save(
self,
path: str,
overwrite: bool = False,
save_data: bool = False,
engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr",
**kwargs,
):
"""Save the model.
Parameters
----------
path : str
Path to save the model.
overwrite: bool, default=False
Whether or not to overwrite the existing path if it already exists.
Ignored unless `engine="zarr"`.
save_data : str
Whether or not to save the full input data along with the fitted components.
engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr"
Xarray backend engine to use for writing the saved model.
**kwargs
Additional keyword arguments to pass to `DataTree.to_netcdf()` or `DataTree.to_zarr()`.
"""
self.compute()
dt = self.serialize()
# Remove any raw data arrays at this stage
if not save_data:
dt = insert_placeholders(dt)
write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs)