Skip to content

Commit

Permalink
Lazy Imports (pydata#7179)
Browse files Browse the repository at this point in the history
* fix typing of BackendEntrypoint

* make backends lazy

* make matplotlib lazy and add tests for lazy modules

* make flox lazy

* fix generated docs on windows...

* try fixing test

* make pycompat lazy

* make dask.array lazy

* add import xarray without numpy or pandas benchmark

* improve error reporting in test

* fix import benchmark

* add lazy import to whats-new

* fix lazy import test

* fix typos

* fix windows stuff again
  • Loading branch information
headtr1ck authored Oct 28, 2022
1 parent fc9026b commit f32d354
Show file tree
Hide file tree
Showing 33 changed files with 445 additions and 241 deletions.
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ class Import:
"""Benchmark importing xarray"""

def timeraw_import_xarray(self):
return """
import xarray
"""
return "import xarray"

def timeraw_import_xarray_plot(self):
return """
import xarray.plot
"""
return "import xarray.plot"

def timeraw_import_xarray_backends(self):
return """
from xarray.backends import list_engines
list_engines()
"""

def timeraw_import_xarray_only(self):
# import numpy and pandas in the setup stage
return "import xarray", "import numpy, pandas"
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ Internal Changes
encoding times to preserve existing behavior and prevent future errors when it
is eventually set to ``True`` by default in cftime (:pull:`7171`). By
`Spencer Clark <https://github.com/spencerkclark>`_.
- Improved import time by lazily importing backend modules, matplotlib, dask.array and flox. (:issue:`6726`, :pull:`7179`)
By `Michael Niklas <https://github.com/headtr1ck>`_.
- Emit a warning under the development version of pandas when we convert
non-nanosecond precision datetime or timedelta values to nanosecond precision.
This was required in the past, because pandas previously was not compatible
Expand Down
27 changes: 11 additions & 16 deletions xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -18,20 +18,6 @@
from .locks import SerializableLock, ensure_lock
from .store import StoreBackendEntrypoint

try:
import cfgrib

has_cfgrib = True
except ModuleNotFoundError:
has_cfgrib = False
# cfgrib throws a RuntimeError if eccodes is not installed
except (ImportError, RuntimeError):
warnings.warn(
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
"Try `import cfgrib` to get the full error message"
)
has_cfgrib = False

# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe
# in most circumstances. See:
# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions
Expand Down Expand Up @@ -61,6 +47,15 @@ class CfGribDataStore(AbstractDataStore):
"""

def __init__(self, filename, lock=None, **backend_kwargs):
try:
import cfgrib
# cfgrib throws a RuntimeError if eccodes is not installed
except (ImportError, RuntimeError) as err:
warnings.warn(
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
"Try `import cfgrib` to get the full error message"
)
raise err

if lock is None:
lock = ECCODES_LOCK
Expand Down Expand Up @@ -96,7 +91,7 @@ def get_encoding(self):


class CfgribfBackendEntrypoint(BackendEntrypoint):
available = has_cfgrib
available = module_available("cfgrib")

def guess_can_open(self, filename_or_obj):
try:
Expand Down
15 changes: 9 additions & 6 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,22 +376,25 @@ class BackendEntrypoint:
Attributes
----------
open_dataset_parameters : tuple, default None
available : bool, default: True
Indicate wether this backend is available given the installed packages.
The setting of this attribute is not mandatory.
open_dataset_parameters : tuple, default: None
A list of ``open_dataset`` method parameters.
The setting of this attribute is not mandatory.
description : str
description : str, default: ""
A short string describing the engine.
The setting of this attribute is not mandatory.
url : str
url : str, default: ""
A string with the URL to the backend's documentation.
The setting of this attribute is not mandatory.
"""

available: ClassVar[bool] = True

open_dataset_parameters: tuple | None = None
description: str = ""
url: str = ""
open_dataset_parameters: ClassVar[tuple | None] = None
description: ClassVar[str] = ""
url: ClassVar[str] = ""

def __repr__(self) -> str:
txt = f"<{type(self).__name__}>"
Expand Down
19 changes: 8 additions & 11 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..core.utils import (
FrozenDict,
is_remote_uri,
module_available,
read_magic_number_from_file,
try_read_magic_number_from_file_or_path,
)
Expand All @@ -33,16 +34,6 @@
)
from .store import StoreBackendEntrypoint

try:
import h5netcdf

has_h5netcdf = True
except ImportError:
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
# where errors have mismatched versions of c-dependencies. This can happen
# when developers are making changes them.
has_h5netcdf = False


class H5NetCDFArrayWrapper(BaseNetCDF4Array):
def get_array(self, needs_lock=True):
Expand Down Expand Up @@ -110,6 +101,7 @@ class H5NetCDFStore(WritableCFDataStore):
)

def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):
import h5netcdf

if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
if group is None:
Expand Down Expand Up @@ -147,6 +139,7 @@ def open(
phony_dims=None,
decode_vlen_strings=True,
):
import h5netcdf

if isinstance(filename, bytes):
raise ValueError(
Expand Down Expand Up @@ -237,12 +230,16 @@ def get_attrs(self):
return FrozenDict(_read_attributes(self.ds))

def get_dimensions(self):
import h5netcdf

if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
else:
return self.ds.dimensions

def get_encoding(self):
import h5netcdf

if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
return {
"unlimited_dims": {
Expand Down Expand Up @@ -373,7 +370,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint):
backends.ScipyBackendEntrypoint
"""

available = has_h5netcdf
available = module_available("h5netcdf")
description = (
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray"
)
Expand Down
16 changes: 4 additions & 12 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
FrozenDict,
close_on_error,
is_remote_uri,
module_available,
try_read_magic_number_from_path,
)
from ..core.variable import Variable
Expand All @@ -31,17 +32,6 @@
from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable
from .store import StoreBackendEntrypoint

try:
import netCDF4

has_netcdf4 = True
except ImportError:
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
# where errors have mismatched versions of c-dependencies. This can happen
# when developers are making changes them.
has_netcdf4 = False


# This lookup table maps from dtype.byteorder to a readable endian
# string used by netCDF4.
_endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"}
Expand Down Expand Up @@ -313,6 +303,7 @@ class NetCDF4DataStore(WritableCFDataStore):
def __init__(
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
):
import netCDF4

if isinstance(manager, netCDF4.Dataset):
if group is None:
Expand Down Expand Up @@ -349,6 +340,7 @@ def open(
lock_maker=None,
autoclose=False,
):
import netCDF4

if isinstance(filename, os.PathLike):
filename = os.fspath(filename)
Expand Down Expand Up @@ -537,7 +529,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):
backends.ScipyBackendEntrypoint
"""

available = has_netcdf4
available = module_available("netCDF4")
description = (
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray"
)
Expand Down
13 changes: 3 additions & 10 deletions xarray/backends/pseudonetcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,14 +16,6 @@
from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock
from .store import StoreBackendEntrypoint

try:
from PseudoNetCDF import pncopen

has_pseudonetcdf = True
except ModuleNotFoundError:
has_pseudonetcdf = False


# psuedonetcdf can invoke netCDF libraries internally
PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK])

Expand Down Expand Up @@ -56,6 +48,7 @@ class PseudoNetCDFDataStore(AbstractDataStore):

@classmethod
def open(cls, filename, lock=None, mode=None, **format_kwargs):
from PseudoNetCDF import pncopen

keywords = {"kwargs": format_kwargs}
# only include mode if explicitly passed
Expand Down Expand Up @@ -128,7 +121,7 @@ class PseudoNetCDFBackendEntrypoint(BackendEntrypoint):
backends.PseudoNetCDFDataStore
"""

available = has_pseudonetcdf
available = module_available("PseudoNetCDF")
description = (
"Open many atmospheric science data formats using PseudoNetCDF in Xarray"
)
Expand Down
24 changes: 12 additions & 12 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@

from ..core import indexing
from ..core.pycompat import integer_types
from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri
from ..core.utils import (
Frozen,
FrozenDict,
close_on_error,
is_dict_like,
is_remote_uri,
module_available,
)
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,15 +23,6 @@
)
from .store import StoreBackendEntrypoint

try:
import pydap.client
import pydap.lib

pydap_version = pydap.lib.__version__
has_pydap = True
except ModuleNotFoundError:
has_pydap = False


class PydapArrayWrapper(BackendArray):
def __init__(self, array):
Expand Down Expand Up @@ -101,6 +99,8 @@ def open(
verify=None,
user_charset=None,
):
import pydap.client
import pydap.lib

if timeout is None:
from pydap.lib import DEFAULT_TIMEOUT
Expand All @@ -114,7 +114,7 @@ def open(
"output_grid": output_grid or True,
"timeout": timeout,
}
if Version(pydap_version) >= Version("3.3.0"):
if Version(pydap.lib.__version__) >= Version("3.3.0"):
if verify is not None:
kwargs.update({"verify": verify})
if user_charset is not None:
Expand Down Expand Up @@ -154,7 +154,7 @@ class PydapBackendEntrypoint(BackendEntrypoint):
backends.PydapDataStore
"""

available = has_pydap
available = module_available("pydap")
description = "Open remote datasets via OPeNDAP using pydap in Xarray"
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"

Expand Down
13 changes: 3 additions & 10 deletions xarray/backends/pynio_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,14 +16,6 @@
from .locks import HDF5_LOCK, NETCDFC_LOCK, SerializableLock, combine_locks, ensure_lock
from .store import StoreBackendEntrypoint

try:
import Nio

has_pynio = True
except ModuleNotFoundError:
has_pynio = False


# PyNIO can invoke netCDF libraries internally
# Add a dedicated lock just in case NCL as well isn't thread-safe.
NCL_LOCK = SerializableLock()
Expand Down Expand Up @@ -61,6 +53,7 @@ class NioDataStore(AbstractDataStore):
"""Store for accessing datasets via PyNIO"""

def __init__(self, filename, mode="r", lock=None, **kwargs):
import Nio

if lock is None:
lock = PYNIO_LOCK
Expand Down Expand Up @@ -101,7 +94,7 @@ def close(self):


class PynioBackendEntrypoint(BackendEntrypoint):
available = has_pynio
available = module_available("Nio")

def open_dataset(
self,
Expand Down
Loading

0 comments on commit f32d354

Please sign in to comment.