From 8a23e249d44b6e677600244efed4491a2749db73 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Apr 2024 08:23:59 -0600 Subject: [PATCH] Delete pynio backend. (#8971) * Delete pynio backend. * cleanup test * fix whats-new --- .binder/environment.yml | 1 - doc/getting-started-guide/installing.rst | 3 - doc/user-guide/io.rst | 21 --- doc/whats-new.rst | 5 +- xarray/backends/__init__.py | 2 - xarray/backends/api.py | 19 ++- xarray/backends/pynio_.py | 164 ----------------------- xarray/tests/__init__.py | 1 - xarray/tests/test_backends.py | 47 +------ xarray/tests/test_plugins.py | 2 - 10 files changed, 16 insertions(+), 249 deletions(-) delete mode 100644 xarray/backends/pynio_.py diff --git a/.binder/environment.yml b/.binder/environment.yml index 053b12dfc86..fee5ed07cf7 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -28,7 +28,6 @@ dependencies: - pip - pooch - pydap - - pynio - rasterio - scipy - seaborn diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index f7eaf92f9cf..ca12ae62440 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -31,9 +31,6 @@ For netCDF and IO - `pydap `__: used as a fallback for accessing OPeNDAP - `h5netcdf `__: an alternative library for reading and writing netCDF4 files that does not use the netCDF-C libraries -- `PyNIO `__: for reading GRIB and other - geoscience specific file formats. Note that PyNIO is not available for Windows and - that the PyNIO backend may be moved outside of xarray in the future. - `zarr `__: for chunked, compressed, N-dimensional arrays. - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 48751c5f299..63bf8b80d81 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -1294,27 +1294,6 @@ We recommend installing cfgrib via conda:: .. _cfgrib: https://github.com/ecmwf/cfgrib -.. _io.pynio: - -Formats supported by PyNIO --------------------------- - -.. warning:: - - The `PyNIO backend is deprecated`_. `PyNIO is no longer maintained`_. - -Xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_, -if PyNIO is installed. To use PyNIO to read such files, supply -``engine='pynio'`` to :py:func:`open_dataset`. - -We recommend installing PyNIO via conda:: - - conda install -c conda-forge pynio - -.. _PyNIO: https://www.pyngl.ucar.edu/Nio.shtml -.. _PyNIO backend is deprecated: https://github.com/pydata/xarray/issues/4491 -.. _PyNIO is no longer maintained: https://github.com/NCAR/pynio/issues/53 - CSV and other formats supported by pandas ----------------------------------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2332f7f236b..413ff091d01 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,8 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- The PyNIO backend has been deleted (:issue:`4491`, :pull:`7301`). + By `Deepak Cherian `_. Bug fixes @@ -6806,8 +6808,7 @@ Enhancements datasets with a MultiIndex to a netCDF file. User contributions in this area would be greatly appreciated. -- Support for reading GRIB, HDF4 and other file formats via PyNIO_. See - :ref:`io.pynio` for more details. +- Support for reading GRIB, HDF4 and other file formats via PyNIO_. - Better error message when a variable is supplied with the same name as one of its dimensions. - Plotting: more control on colormap parameters (:issue:`642`). ``vmin`` and diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index 1c8d2d3a659..550b9e29e42 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -15,7 +15,6 @@ from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint, NetCDF4DataStore from xarray.backends.plugins import list_engines, refresh_engines from xarray.backends.pydap_ import PydapBackendEntrypoint, PydapDataStore -from xarray.backends.pynio_ import NioDataStore from xarray.backends.scipy_ import ScipyBackendEntrypoint, ScipyDataStore from xarray.backends.store import StoreBackendEntrypoint from xarray.backends.zarr import ZarrBackendEntrypoint, ZarrStore @@ -30,7 +29,6 @@ "InMemoryDataStore", "NetCDF4DataStore", "PydapDataStore", - "NioDataStore", "ScipyDataStore", "H5NetCDFStore", "ZarrStore", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 2589ff196f9..62085fe5e2a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -61,7 +61,7 @@ T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"] T_Engine = Union[ T_NetcdfEngine, - Literal["pydap", "pynio", "zarr"], + Literal["pydap", "zarr"], type[BackendEntrypoint], str, # no nice typing support for custom backends None, @@ -79,7 +79,6 @@ "scipy": backends.ScipyDataStore, "pydap": backends.PydapDataStore.open, "h5netcdf": backends.H5NetCDFStore.open, - "pynio": backends.NioDataStore, "zarr": backends.ZarrStore.open_group, } @@ -420,8 +419,8 @@ def open_dataset( ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "zarr", None}, installed backend \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ + , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -523,7 +522,7 @@ def open_dataset( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio". + "scipy". See engine open function for kwargs accepted by each specific engine. @@ -627,8 +626,8 @@ def open_dataarray( ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "zarr", None}, installed backend \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ + , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -728,7 +727,7 @@ def open_dataarray( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio". + "scipy". See engine open function for kwargs accepted by each specific engine. @@ -897,8 +896,8 @@ def open_mfdataset( If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in ``ds.encoding["source"]``. - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "zarr", None}, installed backend \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "zarr", None}\ + , installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py deleted file mode 100644 index 75e96ffdc0a..00000000000 --- a/xarray/backends/pynio_.py +++ /dev/null @@ -1,164 +0,0 @@ -from __future__ import annotations - -import warnings -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any - -import numpy as np - -from xarray.backends.common import ( - BACKEND_ENTRYPOINTS, - AbstractDataStore, - BackendArray, - BackendEntrypoint, - _normalize_path, -) -from xarray.backends.file_manager import CachingFileManager -from xarray.backends.locks import ( - HDF5_LOCK, - NETCDFC_LOCK, - SerializableLock, - combine_locks, - ensure_lock, -) -from xarray.backends.store import StoreBackendEntrypoint -from xarray.core import indexing -from xarray.core.utils import Frozen, FrozenDict, close_on_error -from xarray.core.variable import Variable - -if TYPE_CHECKING: - import os - from io import BufferedIOBase - - from xarray.core.dataset import Dataset - -# PyNIO can invoke netCDF libraries internally -# Add a dedicated lock just in case NCL as well isn't thread-safe. -NCL_LOCK = SerializableLock() -PYNIO_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK, NCL_LOCK]) - - -class NioArrayWrapper(BackendArray): - def __init__(self, variable_name, datastore): - self.datastore = datastore - self.variable_name = variable_name - array = self.get_array() - self.shape = array.shape - self.dtype = np.dtype(array.typecode()) - - def get_array(self, needs_lock=True): - ds = self.datastore._manager.acquire(needs_lock) - return ds.variables[self.variable_name] - - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem - ) - - def _getitem(self, key): - with self.datastore.lock: - array = self.get_array(needs_lock=False) - - if key == () and self.ndim == 0: - return array.get_value() - - return array[key] - - -class NioDataStore(AbstractDataStore): - """Store for accessing datasets via PyNIO""" - - def __init__(self, filename, mode="r", lock=None, **kwargs): - import Nio - - warnings.warn( - "The PyNIO backend is Deprecated and will be removed from Xarray in a future release. " - "See https://github.com/pydata/xarray/issues/4491 for more information", - DeprecationWarning, - ) - - if lock is None: - lock = PYNIO_LOCK - self.lock = ensure_lock(lock) - self._manager = CachingFileManager( - Nio.open_file, filename, lock=lock, mode=mode, kwargs=kwargs - ) - # xarray provides its own support for FillValue, - # so turn off PyNIO's support for the same. - self.ds.set_option("MaskedArrayMode", "MaskedNever") - - @property - def ds(self): - return self._manager.acquire() - - def open_store_variable(self, name, var): - data = indexing.LazilyIndexedArray(NioArrayWrapper(name, self)) - return Variable(var.dimensions, data, var.attributes) - - def get_variables(self): - return FrozenDict( - (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() - ) - - def get_attrs(self): - return Frozen(self.ds.attributes) - - def get_dimensions(self): - return Frozen(self.ds.dimensions) - - def get_encoding(self): - return { - "unlimited_dims": {k for k in self.ds.dimensions if self.ds.unlimited(k)} - } - - def close(self): - self._manager.close() - - -class PynioBackendEntrypoint(BackendEntrypoint): - """ - PyNIO backend - - .. deprecated:: 0.20.0 - - Deprecated as PyNIO is no longer supported. See - https://github.com/pydata/xarray/issues/4491 for more information - """ - - def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs - self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, - *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, - mode="r", - lock=None, - ) -> Dataset: - filename_or_obj = _normalize_path(filename_or_obj) - store = NioDataStore( - filename_or_obj, - mode=mode, - lock=lock, - ) - - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(store): - ds = store_entrypoint.open_dataset( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - return ds - - -BACKEND_ENTRYPOINTS["pynio"] = ("Nio", PynioBackendEntrypoint) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 3ce788dfb7f..26232471aaf 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -99,7 +99,6 @@ def _importorskip( ) has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf") -has_pynio, requires_pynio = _importorskip("Nio") has_cftime, requires_cftime = _importorskip("cftime") has_dask, requires_dask = _importorskip("dask") with warnings.catch_warnings(): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index be9b3ef0422..bfa26025fd8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -75,7 +75,6 @@ requires_netCDF4, requires_netCDF4_1_6_2_or_above, requires_pydap, - requires_pynio, requires_scipy, requires_scipy_or_netCDF4, requires_zarr, @@ -3769,7 +3768,7 @@ def test_get_variable_list_empty_driver_kwds(self) -> None: assert "Temperature" in list(actual) -@pytest.fixture(params=["scipy", "netcdf4", "h5netcdf", "pynio", "zarr"]) +@pytest.fixture(params=["scipy", "netcdf4", "h5netcdf", "zarr"]) def readengine(request): return request.param @@ -3818,8 +3817,6 @@ def tmp_store(request, tmp_path): def skip_if_not_engine(engine): if engine == "netcdf4": pytest.importorskip("netCDF4") - elif engine == "pynio": - pytest.importorskip("Nio") else: pytest.importorskip(engine) @@ -3827,25 +3824,22 @@ def skip_if_not_engine(engine): @requires_dask @pytest.mark.filterwarnings("ignore:use make_scale(name) instead") @pytest.mark.xfail(reason="Flaky test. Very open to contributions on fixing this") +@pytest.mark.skipif(ON_WINDOWS, reason="Skipping on Windows") def test_open_mfdataset_manyfiles( readengine, nfiles, parallel, chunks, file_cache_maxsize ): # skip certain combinations skip_if_not_engine(readengine) - if ON_WINDOWS: - pytest.skip("Skipping on Windows") - randdata = np.random.randn(nfiles) original = Dataset({"foo": ("x", randdata)}) # test standard open_mfdataset approach with too many files with create_tmp_files(nfiles) as tmpfiles: - writeengine = readengine if readengine != "pynio" else "netcdf4" # split into multiple sets of temp files for ii in original.x.values: subds = original.isel(x=slice(ii, ii + 1)) - if writeengine != "zarr": - subds.to_netcdf(tmpfiles[ii], engine=writeengine) + if readengine != "zarr": + subds.to_netcdf(tmpfiles[ii], engine=readengine) else: # if writeengine == "zarr": subds.to_zarr(store=tmpfiles[ii]) @@ -4734,39 +4728,6 @@ def test_session(self) -> None: ) -@requires_scipy -@requires_pynio -class TestPyNio(CFEncodedBase, NetCDF3Only): - def test_write_store(self) -> None: - # pynio is read-only for now - pass - - @contextlib.contextmanager - def open(self, path, **kwargs): - with open_dataset(path, engine="pynio", **kwargs) as ds: - yield ds - - def test_kwargs(self) -> None: - kwargs = {"format": "grib"} - path = os.path.join(os.path.dirname(__file__), "data", "example") - with backends.NioDataStore(path, **kwargs) as store: - assert store._manager._kwargs["format"] == "grib" - - def save(self, dataset, path, **kwargs): - return dataset.to_netcdf(path, engine="scipy", **kwargs) - - def test_weakrefs(self) -> None: - example = Dataset({"foo": ("x", np.arange(5.0))}) - expected = example.rename({"foo": "bar", "x": "y"}) - - with create_tmp_file() as tmp_file: - example.to_netcdf(tmp_file, engine="scipy") - on_disk = open_dataset(tmp_file, engine="pynio") - actual = on_disk.rename({"foo": "bar", "x": "y"}) - del on_disk # trigger garbage collection - assert_identical(actual, expected) - - class TestEncodingInvalid: def test_extract_nc4_variable_encoding(self) -> None: var = xr.Variable(("x",), [1, 2, 3], {}, {"foo": "bar"}) diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index b518c973d3a..8e1eb616cca 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -16,7 +16,6 @@ has_h5netcdf, has_netCDF4, has_pydap, - has_pynio, has_scipy, has_zarr, ) @@ -280,7 +279,6 @@ def test_list_engines() -> None: assert ("netcdf4" in engines) == has_netCDF4 assert ("pydap" in engines) == has_pydap assert ("zarr" in engines) == has_zarr - assert ("pynio" in engines) == has_pynio assert "store" in engines