Allow setting (or skipping) new indexes in open_dataset #8051
976 fail, 1 889 skipped, 16 974 pass in 2h 16m 33s
Annotations
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_default_fill_value (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe00710>
def test_default_fill_value(self) -> None:
# Test default encoding for float:
ds = Dataset({"x": ("y", np.arange(10.0))})
kwargs = dict(encoding={"x": {"dtype": "f4"}})
> with self.roundtrip(ds, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpvlluzcx4/temp-0.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_explicitly_omit_fill_value (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe00920>
def test_explicitly_omit_fill_value(self) -> None:
ds = Dataset({"x": ("y", [np.pi, -np.pi])})
ds.x.encoding["_FillValue"] = None
> with self.roundtrip(ds) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1181:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp0d2k8f3x/temp-1.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_explicitly_omit_fill_value_via_encoding_kwarg (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe00bc0>
def test_explicitly_omit_fill_value_via_encoding_kwarg(self) -> None:
ds = Dataset({"x": ("y", [np.pi, -np.pi])})
kwargs = dict(encoding={"x": {"_FillValue": None}})
> with self.roundtrip(ds, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1187:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp8lu8zfle/temp-2.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_explicitly_omit_fill_value_in_coord (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe00e00>
def test_explicitly_omit_fill_value_in_coord(self) -> None:
ds = Dataset({"x": ("y", [np.pi, -np.pi])}, coords={"y": [0.0, 1.0]})
ds.y.encoding["_FillValue"] = None
> with self.roundtrip(ds) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1194:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpy51tn8hr/temp-3.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_explicitly_omit_fill_value_in_coord_via_encoding_kwarg (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe01100>
def test_explicitly_omit_fill_value_in_coord_via_encoding_kwarg(self) -> None:
ds = Dataset({"x": ("y", [np.pi, -np.pi])}, coords={"y": [0.0, 1.0]})
kwargs = dict(encoding={"y": {"_FillValue": None}})
> with self.roundtrip(ds, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1200:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpy27qqdmm/temp-4.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_encoding_same_dtype (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe013a0>
def test_encoding_same_dtype(self) -> None:
ds = Dataset({"x": ("y", np.arange(10.0, dtype="f4"))})
kwargs = dict(encoding={"x": {"dtype": "f4"}})
> with self.roundtrip(ds, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1207:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpzzsnxa6t/temp-5.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
9 out of 13 runs failed: test_roundtrip_example_1_netcdf (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7f3837181e20>
def test_roundtrip_example_1_netcdf(self) -> None:
> with open_example_dataset("example_1.nc") as expected:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:578:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:132: in open_example_dataset
return open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/home/runner/work/xarray/xarray/xarray/tests/data/example_1.nc'
engine = 'netcdf4', chunks = None, cache = True, decode_cf = None
mask_and_scale = None, decode_times = None, decode_timedelta = None
use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_append_write (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe016d0>
def test_append_write(self) -> None:
# regression for GH1215
data = create_test_data()
> with self.roundtrip_append(data) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1216:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:327: in roundtrip_append
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpl_4q3l0g/temp-6.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_append_overwrite_values (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe018e0>
def test_append_overwrite_values(self) -> None:
# regression for GH1215
data = create_test_data()
with create_tmp_file(allow_cleanup_failure=False) as tmp_file:
self.save(data, tmp_file, mode="w")
data["var2"][:] = -999
data["var9"] = data["var2"] * 3
self.save(data[["var2", "var9"]], tmp_file, mode="a")
> with self.open(tmp_file) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1227:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmplq_aspux/temp-7.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
9 out of 13 runs failed: test_grid_mapping_and_bounds_are_not_coordinates_in_file (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7f3837178c20>
def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self) -> None:
original = self._create_cf_dataset()
with create_tmp_file() as tmp_file:
original.to_netcdf(tmp_file)
> with open_dataset(tmp_file, decode_coords=False) as ds:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:980:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp295mijoi/temp-108.nc', engine = 'netcdf4'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_multiindex_not_implemented (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fe01d60>
def test_multiindex_not_implemented(self) -> None:
ds = Dataset(coords={"y": ("x", [1, 2]), "z": ("x", ["a", "b"])}).set_index(
x=["y", "z"]
)
with pytest.raises(NotImplementedError, match=r"MultiIndex"):
with self.roundtrip(ds):
pass
# regression GH8628 (can serialize reset multi-index level coordinates)
ds_reset = ds.reset_index("x")
> with self.roundtrip(ds_reset) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1251:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmphzkt6zxk/temp-10.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
9 out of 13 runs failed: test_refresh_from_disk (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd31d0>
@pytest.mark.skipif(
ON_WINDOWS, reason="Windows does not allow modifying open files"
)
def test_refresh_from_disk(self) -> None:
# regression test for https://github.com/pydata/xarray/issues/4862
with create_tmp_file() as example_1_path:
with create_tmp_file() as example_1_modified_path:
> with open_example_dataset("example_1.nc") as example_1:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1266:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:132: in open_example_dataset
return open_dataset(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/home/runner/work/xarray/xarray/xarray/tests/data/example_1.nc'
engine = 'netcdf4', chunks = None, cache = True, decode_cf = None
mask_and_scale = None, decode_times = None, decode_timedelta = None
use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_open_group (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d70347f50>
def test_open_group(self) -> None:
# Create a netCDF file with a dataset stored within a group
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, "w") as rootgrp:
foogrp = rootgrp.createGroup("foo")
ds = foogrp
ds.createDimension("time", size=10)
x = np.arange(10)
ds.createVariable("x", np.int32, dimensions=("time",))
ds.variables["x"][:] = x
expected = Dataset()
expected["x"] = ("time", x)
# check equivalent ways to specify group
for group in "foo", "/foo", "foo/", "/foo/":
> with self.open(tmp_file, group=group) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1339:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmphu2wvjo8/temp-13.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_open_subgroup (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d70347d10>
def test_open_subgroup(self) -> None:
# Create a netCDF file with a dataset stored within a group within a
# group
with create_tmp_file() as tmp_file:
rootgrp = nc4.Dataset(tmp_file, "w")
foogrp = rootgrp.createGroup("foo")
bargrp = foogrp.createGroup("bar")
ds = bargrp
ds.createDimension("time", size=10)
x = np.arange(10)
ds.createVariable("x", np.int32, dimensions=("time",))
ds.variables["x"][:] = x
rootgrp.close()
expected = Dataset()
expected["x"] = ("time", x)
# check equivalent ways to specify group
for group in "foo/bar", "/foo/bar", "foo/bar/", "/foo/bar/":
> with self.open(tmp_file, group=group) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1367:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp0ahm1m0z/temp-14.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_write_groups (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd00b0>
def test_write_groups(self) -> None:
data1 = create_test_data()
data2 = data1 * 2
with create_tmp_file() as tmp_file:
self.save(data1, tmp_file, group="data/1")
self.save(data2, tmp_file, group="data/2", mode="a")
> with self.open(tmp_file, group="data/1") as actual1:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1376:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp4x59k76n/temp-15.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_encoding_kwarg_vlen_string[input_strings0-True] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd04a0>
input_strings = [b'foo', b'bar', b'baz'], is_bytes = True
@pytest.mark.parametrize(
"input_strings, is_bytes",
[
([b"foo", b"bar", b"baz"], True),
(["foo", "bar", "baz"], False),
(["foó", "bár", "baź"], False),
],
)
def test_encoding_kwarg_vlen_string(
self, input_strings: list[str], is_bytes: bool
) -> None:
original = Dataset({"x": input_strings})
expected_string = ["foo", "bar", "baz"] if is_bytes else input_strings
expected = Dataset({"x": expected_string})
kwargs = dict(encoding={"x": {"dtype": str}})
> with self.roundtrip(original, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1397:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpns1q4x7e/temp-16.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_encoding_kwarg_vlen_string[input_strings1-False] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd06b0>
input_strings = ['foo', 'bar', 'baz'], is_bytes = False
@pytest.mark.parametrize(
"input_strings, is_bytes",
[
([b"foo", b"bar", b"baz"], True),
(["foo", "bar", "baz"], False),
(["foó", "bár", "baź"], False),
],
)
def test_encoding_kwarg_vlen_string(
self, input_strings: list[str], is_bytes: bool
) -> None:
original = Dataset({"x": input_strings})
expected_string = ["foo", "bar", "baz"] if is_bytes else input_strings
expected = Dataset({"x": expected_string})
kwargs = dict(encoding={"x": {"dtype": str}})
> with self.roundtrip(original, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1397:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp70k4v90g/temp-17.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_encoding_kwarg_vlen_string[input_strings2-False] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd0800>
input_strings = ['foó', 'bár', 'baź'], is_bytes = False
@pytest.mark.parametrize(
"input_strings, is_bytes",
[
([b"foo", b"bar", b"baz"], True),
(["foo", "bar", "baz"], False),
(["foó", "bár", "baź"], False),
],
)
def test_encoding_kwarg_vlen_string(
self, input_strings: list[str], is_bytes: bool
) -> None:
original = Dataset({"x": input_strings})
expected_string = ["foo", "bar", "baz"] if is_bytes else input_strings
expected = Dataset({"x": expected_string})
kwargs = dict(encoding={"x": {"dtype": str}})
> with self.roundtrip(original, save_kwargs=kwargs) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1397:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpkl1wcpih/temp-18.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_roundtrip_string_with_fill_value_vlen[XXX] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd05f0>
fill_value = 'XXX'
@pytest.mark.parametrize("fill_value", ["XXX", "", "bár"])
def test_roundtrip_string_with_fill_value_vlen(self, fill_value: str) -> None:
values = np.array(["ab", "cdef", np.nan], dtype=object)
expected = Dataset({"x": ("t", values)})
original = Dataset({"x": ("t", values, {}, {"_FillValue": fill_value})})
> with self.roundtrip(original) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1408:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpn8obtjw1/temp-19.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_roundtrip_string_with_fill_value_vlen[] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd0b00>
fill_value = ''
@pytest.mark.parametrize("fill_value", ["XXX", "", "bár"])
def test_roundtrip_string_with_fill_value_vlen(self, fill_value: str) -> None:
values = np.array(["ab", "cdef", np.nan], dtype=object)
expected = Dataset({"x": ("t", values)})
original = Dataset({"x": ("t", values, {}, {"_FillValue": fill_value})})
> with self.roundtrip(original) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1408:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp59n6gpo6/temp-20.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestNetCDF4Data
github-actions / Test Results
12 out of 13 runs failed: test_zero_dimensional_variable (xarray.tests.test_backends.TestNetCDF4Data)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestNetCDF4Data object at 0x7f89db7bedb0>
def test_zero_dimensional_variable(self) -> None:
expected = create_test_data()
expected["float_var"] = ([], 1.0e9, {"units": "units of awesome"})
expected["bytes_var"] = ([], b"foobar")
expected["string_var"] = ([], "foobar")
> with self.roundtrip(expected) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:346:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmp844xz1xw/temp-0.nc', engine = 'netcdf4'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestZarrWriteEmpty
github-actions / Test Results
9 out of 13 runs failed: test_coordinates_encoding (xarray.tests.test_backends.TestZarrWriteEmpty)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestZarrWriteEmpty object at 0x7f38371783b0>
def test_coordinates_encoding(self) -> None:
def equals_latlon(obj):
return obj == "lat lon" or obj == "lon lat"
original = Dataset(
{"temp": ("x", [0, 1]), "precip": ("x", [0, -1])},
{"lat": ("x", [2, 3]), "lon": ("x", [4, 5])},
)
with self.roundtrip(original) as actual:
assert_identical(actual, original)
with create_tmp_file() as tmp_file:
original.to_netcdf(tmp_file)
> with open_dataset(tmp_file, decode_coords=False) as ds:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1048:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpxbnyw4e_/temp-113.nc', engine = 'netcdf4'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_roundtrip_string_with_fill_value_vlen[b\xe1r] (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd0cb0>
fill_value = 'bár'
@pytest.mark.parametrize("fill_value", ["XXX", "", "bár"])
def test_roundtrip_string_with_fill_value_vlen(self, fill_value: str) -> None:
values = np.array(["ab", "cdef", np.nan], dtype=object)
expected = Dataset({"x": ("t", values)})
original = Dataset({"x": ("t", values, {}, {"_FillValue": fill_value})})
> with self.roundtrip(original) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1408:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpk3642_md/temp-21.nc', engine = 'h5netcdf'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: H5netcdfBackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestNetCDF4Data
github-actions / Test Results
12 out of 13 runs failed: test_roundtrip_test_data (xarray.tests.test_backends.TestNetCDF4Data)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestNetCDF4Data object at 0x7f89db7beed0>
def test_roundtrip_test_data(self) -> None:
expected = create_test_data()
> with self.roundtrip(expected) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:379:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:312: in roundtrip
with self.open(path, **open_kwargs) as ds:
/home/runner/micromamba/envs/xarray-tests/lib/python3.12/contextlib.py:137: in __enter__
return next(self.gen)
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:338: in open
with open_dataset(path, engine=self.engine, **kwargs) as ds:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmpug1xax4_/temp-2.nc', engine = 'netcdf4'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError
Check warning on line 0 in xarray.tests.test_backends.TestH5NetCDFData
github-actions / Test Results
12 out of 13 runs failed: test_roundtrip_character_array (xarray.tests.test_backends.TestH5NetCDFData)
artifacts/Test results for Linux-3.10 all-but-dask/pytest.xml [took 0s]
artifacts/Test results for Linux-3.10 flaky/pytest.xml [took 0s]
artifacts/Test results for Linux-3.11/pytest.xml [took 0s]
artifacts/Test results for Linux-3.12/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9 min-all-deps/pytest.xml [took 0s]
artifacts/Test results for Linux-3.9/pytest.xml [took 0s]
artifacts/Test results for Windows-3.11/pytest.xml [took 0s]
artifacts/Test results for Windows-3.12/pytest.xml [took 0s]
artifacts/Test results for Windows-3.9/pytest.xml [took 0s]
artifacts/Test results for macOS-3.11/pytest.xml [took 0s]
artifacts/Test results for macOS-3.12/pytest.xml [took 0s]
artifacts/Test results for macOS-3.9/pytest.xml [took 0s]
Raw output
TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
self = <xarray.tests.test_backends.TestH5NetCDFData object at 0x7f3d6fdd0d70>
def test_roundtrip_character_array(self) -> None:
with create_tmp_file() as tmp_file:
values = np.array([["a", "b", "c"], ["d", "e", "f"]], dtype="S")
with nc4.Dataset(tmp_file, mode="w") as nc:
nc.createDimension("x", 2)
nc.createDimension("string3", 3)
v = nc.createVariable("x", np.dtype("S1"), ("x", "string3"))
v[:] = values
values = np.array(["abc", "def"], dtype="S")
expected = Dataset({"x": ("x", values)})
> with open_dataset(tmp_file) as actual:
/home/runner/work/xarray/xarray/xarray/tests/test_backends.py:1427:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
filename_or_obj = '/tmp/tmplb30robe/temp-22.nc', engine = 'netcdf4'
chunks = None, cache = True, decode_cf = None, mask_and_scale = None
decode_times = None, decode_timedelta = None, use_cftime = None
def open_dataset(
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
*,
engine: T_Engine = None,
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
set_indexes: bool = True,
inline_array: bool = False,
chunked_array_type: str | None = None,
from_array_kwargs: dict[str, Any] | None = None,
backend_kwargs: dict[str, Any] | None = None,
**kwargs,
) -> Dataset:
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
filename_or_obj : str, Path, file-like or DataStore
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \
"zarr", None}, installed backend \
or subclass of xarray.backends.BackendEntrypoint, optional
Engine to use when reading files. If not provided, the default engine
is chosen based on available dependencies, with a preference for
"netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``)
can also be used.
chunks : int, dict, 'auto' or None, optional
If chunks is provided, it is used to load the new dataset into dask
arrays. ``chunks=-1`` loads the dataset with dask using a single
chunk for all arrays. ``chunks={}`` loads the dataset with dask using
engine preferred chunks if exposed by the backend, otherwise with
a single chunk for all arrays. In order to reproduce the default behavior
of ``xr.open_zarr(...)`` use ``xr.open_dataset(..., engine='zarr', chunks={})``.
``chunks='auto'`` will use dask ``auto`` chunking taking into account the
engine preferred chunks. See dask chunking for more details.
cache : bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False. Does not
change the behavior of coordinates corresponding to dimensions, which
always load their data from disk into a ``pandas.Index``.
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
``np.datetime64[ns]`` objects; if this is not possible, decode times to
``cftime.datetime`` objects. If True, always decode times to
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
- "coordinates" or True: Set variables referred to in the
``'coordinates'`` attribute of the datasets or individual variables
as coordinate variables.
- "all": Set variables referred to in ``'grid_mapping'``, ``'bounds'`` and
other attributes as coordinate variables.
Only existing variables can be set as coordinates. Missing variables
will be silently ignored.
drop_variables: str or iterable of str, optional
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
set_indexes : bool, optional
If True (default), create new indexes from coordinates. Both the number and
the type(s) of those indexes depend on the backend used to open the dataset.
For most common backends this creates a pandas index for each
:term:`Dimension coordinate`, which loads the coordinate data fully in memory.
Set it to False if you want to avoid loading data into memory.
inline_array: bool, default: False
How to include the array in the dask task graph.
By default(``inline_array=False``) the array is included in a task by
itself, and each chunk refers to that task by its key. With
``inline_array=True``, Dask will instead inline the array directly
in the values of the task graph. See :py:func:`dask.array.from_array`.
chunked_array_type: str, optional
Which chunked array type to coerce this datasets' arrays to.
Defaults to 'dask' if installed, else whatever is registered via the `ChunkManagerEnetryPoint` system.
Experimental API that should not be relied upon.
from_array_kwargs: dict
Additional keyword arguments passed on to the `ChunkManagerEntrypoint.from_array` method used to create
chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
For example if :py:func:`dask.array.Array` objects are used for chunking, additional kwargs will be passed
to :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
backend_kwargs: dict
Additional keyword arguments passed on to the engine open function,
equivalent to `**kwargs`.
**kwargs: dict
Additional keyword arguments passed on to the engine open function.
For example:
- 'group': path to the netCDF4 group in the given file to open given as
a str,supported by "netcdf4", "h5netcdf", "zarr".
- 'lock': resource lock to use when reading data from disk. Only
relevant when using dask or another form of parallelism. By default,
appropriate locks are chosen to safely read and write files with the
currently active dask scheduler. Supported by "netcdf4", "h5netcdf",
"scipy", "pynio".
See engine open function for kwargs accepted by each specific engine.
Returns
-------
dataset : Dataset
The newly created dataset.
Notes
-----
``open_dataset`` opens the file with read-only access. When you modify
values of a Dataset, even one linked to files on disk, only the in-memory
copy you are manipulating in xarray is modified: the original file on disk
is never touched.
See Also
--------
open_mfdataset
"""
if cache is None:
cache = chunks is None
if backend_kwargs is not None:
kwargs.update(backend_kwargs)
if engine is None:
engine = plugins.guess_engine(filename_or_obj)
if from_array_kwargs is None:
from_array_kwargs = {}
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
decode_cf,
open_backend_dataset_parameters=backend.open_dataset_parameters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
decode_timedelta=decode_timedelta,
concat_characters=concat_characters,
use_cftime=use_cftime,
decode_coords=decode_coords,
)
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
> backend_ds = backend.open_dataset(
filename_or_obj,
drop_variables=drop_variables,
set_indexes=set_indexes,
**decoders,
**kwargs,
)
E TypeError: NetCDF4BackendEntrypoint.open_dataset() got an unexpected keyword argument 'set_indexes'
/home/runner/work/xarray/xarray/xarray/backends/api.py:579: TypeError