Skip to content

RuntimeError when writing to a file with to_netcdf #2079

@jwnki

Description

@jwnki

Describe the bug

Trying to write an InferenceData object to a file using to_netcdfresults in an error:
RuntimeError: NetCDF: Filter error: bad id or parameters or duplicate filter

To Reproduce

import arviz as az
data = az.load_arviz_data("centered_eight")
data.to_netcdf("test.nc")
Stacktrace
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [5], in <cell line: 1>()
----> 1 data.to_netcdf("test.nc")

File ~/.local/lib/python3.9/site-packages/arviz/data/inference_data.py:427, in InferenceData.to_netcdf(self, filename, compress, groups)
    425 if compress:
    426     kwargs["encoding"] = {var_name: {"zlib": True} for var_name in data.variables}
--> 427 data.to_netcdf(filename, mode=mode, group=group, **kwargs)
    428 data.close()
    429 mode = "a"

File ~/.local/lib/python3.9/site-packages/xarray/core/dataset.py:1901, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1898     encoding = {}
   1899 from ..backends.api import to_netcdf
-> 1901 return to_netcdf(
   1902     self,
   1903     path,
   1904     mode,
   1905     format=format,
   1906     group=group,
   1907     engine=engine,
   1908     encoding=encoding,
   1909     unlimited_dims=unlimited_dims,
   1910     compute=compute,
   1911     invalid_netcdf=invalid_netcdf,
   1912 )

File ~/.local/lib/python3.9/site-packages/xarray/backends/api.py:1072, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1067 # TODO: figure out how to refactor this logic (here and in save_mfdataset)
   1068 # to avoid this mess of conditionals
   1069 try:
   1070     # TODO: allow this work (setting up the file for writing array data)
   1071     # to be parallelized with dask
-> 1072     dump_to_store(
   1073         dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1074     )
   1075     if autoclose:
   1076         store.close()

File ~/.local/lib/python3.9/site-packages/xarray/backends/api.py:1119, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1116 if encoder:
   1117     variables, attrs = encoder(variables, attrs)
-> 1119 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/.local/lib/python3.9/site-packages/xarray/backends/common.py:265, in AbstractWritableDataStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    263 self.set_attributes(attributes)
    264 self.set_dimensions(variables, unlimited_dims=unlimited_dims)
--> 265 self.set_variables(
    266     variables, check_encoding_set, writer, unlimited_dims=unlimited_dims
    267 )

File ~/.local/lib/python3.9/site-packages/xarray/backends/common.py:303, in AbstractWritableDataStore.set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
    301 name = _encode_variable_name(vn)
    302 check = vn in check_encoding_set
--> 303 target, source = self.prepare_variable(
    304     name, v, check, unlimited_dims=unlimited_dims
    305 )
    307 writer.add(source, target)

File ~/.local/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:488, in NetCDF4DataStore.prepare_variable(self, name, variable, check_encoding, unlimited_dims)
    486     nc4_var = self.ds.variables[name]
    487 else:
--> 488     nc4_var = self.ds.createVariable(
    489         varname=name,
    490         datatype=datatype,
    491         dimensions=variable.dims,
    492         zlib=encoding.get("zlib", False),
    493         complevel=encoding.get("complevel", 4),
    494         shuffle=encoding.get("shuffle", True),
    495         fletcher32=encoding.get("fletcher32", False),
    496         contiguous=encoding.get("contiguous", False),
    497         chunksizes=encoding.get("chunksizes"),
    498         endian="native",
    499         least_significant_digit=encoding.get("least_significant_digit"),
    500         fill_value=fill_value,
    501     )
    503 nc4_var.setncatts(attrs)
    505 target = NetCDF4ArrayWrapper(name, self)

File src/netCDF4/_netCDF4.pyx:2838, in netCDF4._netCDF4.Dataset.createVariable()

File src/netCDF4/_netCDF4.pyx:4003, in netCDF4._netCDF4.Variable.__init__()

File src/netCDF4/_netCDF4.pyx:1965, in netCDF4._netCDF4._ensure_nc_success()

RuntimeError: NetCDF: Filter error: bad id or parameters or duplicate filter

Expected behavior
The data set gets written to the file.

Additional context
arviz version: 0.12.1
xarray version: 2022.3.0
netCDF4 version: 1.6.0

This was on a computer cluster which runs Debian GNU/Linux 10 (buster).
The file actually is created but reading it in shows that it's corrupted:

In [2]: aa = az.from_netcdf("test.nc")

In [3]: aa
Out[3]: 
Inference data with groups:
	> 

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions