Skip to content

Serialization fails with xarray>=2024.1.0 #148

Closed
@shenyulu

Description

@shenyulu

Describe the bug
I cannot save EOF model on the newest xeofs package.

import pooch 
pooch.retrieve(url="https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2/sst.mnmean.nc", known_hash=None, path ='.', fname='sst.mnmean.nc')
data_input = xr.open_dataset('sst.mnmean.nc', chunks = 'auto').sst.sel(time = slice('1982-01-01', '2022-12-31'))

from xeofs.models import EOF

model = EOF(
    n_modes = 10,
    standardize = 'False',
    use_coslat = True,
)
model.fit(data_input, dim = 'time')

model.save('test_save.zarr', engine = 'zarr')

Expected behavior

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
Cell In[31], line 1
----> 1 model.save('test_save.zarr', engine = 'zarr')

File ~\anaconda3\envs\easytest\lib\site-packages\xeofs\models\_base_model.py:440, in _BaseModel.save(self, path, overwrite, save_data, engine, **kwargs)
    437 if not save_data:
    438     dt = insert_placeholders(dt)
--> 440 write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs)

File ~\anaconda3\envs\easytest\lib\site-packages\xeofs\utils\io.py:18, in write_model_tree(dt, path, overwrite, engine, **kwargs)
     16     dt.to_netcdf(path, engine=engine, **kwargs)
     17 elif engine == "zarr":
---> 18     dt.to_zarr(path, mode=write_mode, **kwargs)
     19 else:
     20     raise ValueError(f"Unknown engine {engine}")

File ~\anaconda3\envs\easytest\lib\site-packages\datatree\datatree.py:1526, in DataTree.to_zarr(self, store, mode, encoding, consolidated, **kwargs)
   1501 """
   1502 Write datatree contents to a Zarr store.
   1503 
   (...)
   1522     Additional keyword arguments to be passed to ``xarray.Dataset.to_zarr``
   1523 """
   1524 from .io import _datatree_to_zarr
-> 1526 _datatree_to_zarr(
   1527     self,
   1528     store,
   1529     mode=mode,
   1530     encoding=encoding,
   1531     consolidated=consolidated,
   1532     **kwargs,
   1533 )

File ~\anaconda3\envs\easytest\lib\site-packages\datatree\io.py:211, in _datatree_to_zarr(dt, store, mode, encoding, consolidated, **kwargs)
    209     _create_empty_zarr_group(store, group_path, mode)
    210 else:
--> 211     ds.to_zarr(
    212         store,
    213         group=group_path,
    214         mode=mode,
    215         encoding=encoding.get(node.path),
    216         consolidated=False,
    217         **kwargs,
    218     )
    219 if "w" in mode:
    220     mode = "a"

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\core\dataset.py:2521, in Dataset.to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options, zarr_version, write_empty_chunks, chunkmanager_store_kwargs)
   2382 """Write dataset contents to a zarr group.
   2383 
   2384 Zarr chunks are determined in the following way:
   (...)
   2517     The I/O user guide, with more details and examples.
   2518 """
   2519 from xarray.backends.api import to_zarr
-> 2521 return to_zarr(  # type: ignore[call-overload,misc]
   2522     self,
   2523     store=store,
   2524     chunk_store=chunk_store,
   2525     storage_options=storage_options,
   2526     mode=mode,
   2527     synchronizer=synchronizer,
   2528     group=group,
   2529     encoding=encoding,
   2530     compute=compute,
   2531     consolidated=consolidated,
   2532     append_dim=append_dim,
   2533     region=region,
   2534     safe_chunks=safe_chunks,
   2535     zarr_version=zarr_version,
   2536     write_empty_chunks=write_empty_chunks,
   2537     chunkmanager_store_kwargs=chunkmanager_store_kwargs,
   2538 )

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\api.py:1832, in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region, safe_chunks, storage_options, zarr_version, write_empty_chunks, chunkmanager_store_kwargs)
   1830 writer = ArrayWriter()
   1831 # TODO: figure out how to properly handle unlimited_dims
-> 1832 dump_to_store(dataset, zstore, writer, encoding=encoding)
   1833 writes = writer.sync(
   1834     compute=compute, chunkmanager_store_kwargs=chunkmanager_store_kwargs
   1835 )
   1837 if compute:

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\api.py:1362, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1359 if encoder:
   1360     variables, attrs = encoder(variables, attrs)
-> 1362 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:612, in ZarrStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    610 new_variables = set(variables) - existing_variable_names
    611 variables_without_encoding = {vn: variables[vn] for vn in new_variables}
--> 612 variables_encoded, attributes = self.encode(
    613     variables_without_encoding, attributes
    614 )
    616 if existing_variable_names:
    617     # Decode variables directly, without going via xarray.Dataset to
    618     # avoid needing to load index variables into memory.
    619     # TODO: consider making loading indexes lazy again?
    620     existing_vars, _, _ = conventions.decode_cf_variables(
    621         self.get_variables(), self.get_attrs()
    622     )

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\common.py:291, in AbstractWritableDataStore.encode(self, variables, attributes)
    274 def encode(self, variables, attributes):
    275     """
    276     Encode the variables and attributes in this store
    277 
   (...)
    289 
    290     """
--> 291     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    292     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
    293     return variables, attributes

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\common.py:291, in <dictcomp>(.0)
    274 def encode(self, variables, attributes):
    275     """
    276     Encode the variables and attributes in this store
    277 
   (...)
    289 
    290     """
--> 291     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    292     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
    293     return variables, attributes

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:568, in ZarrStore.encode_variable(self, variable)
    567 def encode_variable(self, variable):
--> 568     variable = encode_zarr_variable(variable)
    569     return variable

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\backends\zarr.py:309, in encode_zarr_variable(var, needs_copy, name)
    288 def encode_zarr_variable(var, needs_copy=True, name=None):
    289     """
    290     Converts an Variable into an Variable which follows some
    291     of the CF conventions:
   (...)
    306         A variable which has been encoded as described above.
    307     """
--> 309     var = conventions.encode_cf_variable(var, name=name)
    311     # zarr allows unicode, but not variable-length strings, so it's both
    312     # simpler and more compact to always encode as UTF-8 explicitly.
    313     # TODO: allow toggling this explicitly via dtype in encoding.
    314     coder = coding.strings.EncodedStringCoder(allows_unicode=True)

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\conventions.py:179, in encode_cf_variable(var, needs_copy, name)
    157 def encode_cf_variable(
    158     var: Variable, needs_copy: bool = True, name: T_Name = None
    159 ) -> Variable:
    160     """
    161     Converts a Variable into a Variable which follows some
    162     of the CF conventions:
   (...)
    177         A variable which has been encoded as described above.
    178     """
--> 179     ensure_not_multiindex(var, name=name)
    181     for coder in [
    182         times.CFDatetimeCoder(),
    183         times.CFTimedeltaCoder(),
   (...)
    190         variables.BooleanCoder(),
    191     ]:
    192         var = coder.encode(var, name=name)

File ~\anaconda3\envs\easytest\lib\site-packages\xarray\conventions.py:88, in ensure_not_multiindex(var, name)
     86 def ensure_not_multiindex(var: Variable, name: T_Name = None) -> None:
     87     if isinstance(var._data, indexing.PandasMultiIndexingAdapter):
---> 88         raise NotImplementedError(
     89             f"variable {name!r} is a MultiIndex, which cannot yet be "
     90             "serialized. Instead, either use reset_index() "
     91             "to convert MultiIndex levels into coordinate variables instead "
     92             "or use https://cf-xarray.readthedocs.io/en/latest/coding.html."
     93         )

NotImplementedError: variable None is a MultiIndex, which cannot yet be serialized. Instead, either use reset_index() to convert MultiIndex levels into coordinate variables instead or use https://cf-xarray.readthedocs.io/en/latest/coding.html.

P.S. Exporting the netcdf file is also an error.

model.save('test_save.nc', engine = 'netcdf4')

Desktop:

  • OS: Windows 11
  • xeofs version 2.2.5
  • xarray: 2024.1.1
  • zarr: 2.16.1
  • xarray-datatree: 0.0.13
  • cf-xarray: 0.8.8

Additional context

It seems that the model after serialize (i.e., dt, datatree.datatree.DataTree) is not suitable for exporting files, but I am not sure if it is the 'serialize' problem in xeof or the xarray-datatree problem

    def save(
        self,
        path: str,
        overwrite: bool = False,
        save_data: bool = False,
        engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr",
        **kwargs,
    ):
        """Save the model.

        Parameters
        ----------
        path : str
            Path to save the model.
        overwrite: bool, default=False
            Whether or not to overwrite the existing path if it already exists.
            Ignored unless `engine="zarr"`.
        save_data : str
            Whether or not to save the full input data along with the fitted components.
        engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr"
            Xarray backend engine to use for writing the saved model.
        **kwargs
            Additional keyword arguments to pass to `DataTree.to_netcdf()` or `DataTree.to_zarr()`.

        """
        self.compute()

        dt = self.serialize()

        # Remove any raw data arrays at this stage
        if not save_data:
            dt = insert_placeholders(dt)

        write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs)

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions