Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy Imports #7179

Merged
merged 18 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ class Import:
"""Benchmark importing xarray"""

def timeraw_import_xarray(self):
return """
import xarray
"""
return "import xarray"

def timeraw_import_xarray_plot(self):
return """
import xarray.plot
"""
return "import xarray.plot"

def timeraw_import_xarray_backends(self):
return """
from xarray.backends import list_engines
list_engines()
"""

def timeraw_import_xarray_only(self):
# import numpy and pandas in the setup stage
return "import xarray", "import numpy, pandas"
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ Internal Changes
encoding times to preserve existing behavior and prevent future errors when it
is eventually set to ``True`` by default in cftime (:pull:`7171`). By
`Spencer Clark <https://github.com/spencerkclark>`_.
- Improved import time by lazily importing backend modules, matplotlib, dask.array and flox. (:issue:`6726`, :pull:`7179`)
By `Michael Niklas <https://github.com/headtr1ck>`_.
- Emit a warning under the development version of pandas when we convert
non-nanosecond precision datetime or timedelta values to nanosecond precision.
This was required in the past, because pandas previously was not compatible
Expand Down
27 changes: 11 additions & 16 deletions xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -18,20 +18,6 @@
from .locks import SerializableLock, ensure_lock
from .store import StoreBackendEntrypoint

try:
import cfgrib

has_cfgrib = True
except ModuleNotFoundError:
has_cfgrib = False
# cfgrib throws a RuntimeError if eccodes is not installed
except (ImportError, RuntimeError):
warnings.warn(
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
"Try `import cfgrib` to get the full error message"
)
has_cfgrib = False

# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe
# in most circumstances. See:
# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions
Expand Down Expand Up @@ -61,6 +47,15 @@ class CfGribDataStore(AbstractDataStore):
"""

def __init__(self, filename, lock=None, **backend_kwargs):
try:
import cfgrib
# cfgrib throws a RuntimeError if eccodes is not installed
except (ImportError, RuntimeError) as err:
warnings.warn(
"Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "
"Try `import cfgrib` to get the full error message"
)
raise err

if lock is None:
lock = ECCODES_LOCK
Expand Down Expand Up @@ -96,7 +91,7 @@ def get_encoding(self):


class CfgribfBackendEntrypoint(BackendEntrypoint):
available = has_cfgrib
available = module_available("cfgrib")

def guess_can_open(self, filename_or_obj):
try:
Expand Down
15 changes: 9 additions & 6 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,22 +376,25 @@ class BackendEntrypoint:
Attributes
----------

open_dataset_parameters : tuple, default None
available : bool, default: True
Indicate wether this backend is available given the installed packages.
The setting of this attribute is not mandatory.
open_dataset_parameters : tuple, default: None
A list of ``open_dataset`` method parameters.
The setting of this attribute is not mandatory.
description : str
description : str, default: ""
A short string describing the engine.
The setting of this attribute is not mandatory.
url : str
url : str, default: ""
A string with the URL to the backend's documentation.
The setting of this attribute is not mandatory.
"""

available: ClassVar[bool] = True

open_dataset_parameters: tuple | None = None
description: str = ""
url: str = ""
open_dataset_parameters: ClassVar[tuple | None] = None
description: ClassVar[str] = ""
url: ClassVar[str] = ""

def __repr__(self) -> str:
txt = f"<{type(self).__name__}>"
Expand Down
19 changes: 8 additions & 11 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..core.utils import (
FrozenDict,
is_remote_uri,
module_available,
read_magic_number_from_file,
try_read_magic_number_from_file_or_path,
)
Expand All @@ -33,16 +34,6 @@
)
from .store import StoreBackendEntrypoint

try:
import h5netcdf

has_h5netcdf = True
except ImportError:
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
# where errors have mismatched versions of c-dependencies. This can happen
# when developers are making changes them.
has_h5netcdf = False


class H5NetCDFArrayWrapper(BaseNetCDF4Array):
def get_array(self, needs_lock=True):
Expand Down Expand Up @@ -110,6 +101,7 @@ class H5NetCDFStore(WritableCFDataStore):
)

def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):
import h5netcdf

if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
if group is None:
Expand Down Expand Up @@ -147,6 +139,7 @@ def open(
phony_dims=None,
decode_vlen_strings=True,
):
import h5netcdf

if isinstance(filename, bytes):
raise ValueError(
Expand Down Expand Up @@ -237,12 +230,16 @@ def get_attrs(self):
return FrozenDict(_read_attributes(self.ds))

def get_dimensions(self):
import h5netcdf

if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
else:
return self.ds.dimensions

def get_encoding(self):
import h5netcdf

if Version(h5netcdf.__version__) >= Version("0.14.0.dev0"):
return {
"unlimited_dims": {
Expand Down Expand Up @@ -373,7 +370,7 @@ class H5netcdfBackendEntrypoint(BackendEntrypoint):
backends.ScipyBackendEntrypoint
"""

available = has_h5netcdf
available = module_available("h5netcdf")
description = (
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray"
)
Expand Down
16 changes: 4 additions & 12 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
FrozenDict,
close_on_error,
is_remote_uri,
module_available,
try_read_magic_number_from_path,
)
from ..core.variable import Variable
Expand All @@ -31,17 +32,6 @@
from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable
from .store import StoreBackendEntrypoint

try:
import netCDF4

has_netcdf4 = True
except ImportError:
# Except a base ImportError (not ModuleNotFoundError) to catch usecases
# where errors have mismatched versions of c-dependencies. This can happen
# when developers are making changes them.
has_netcdf4 = False


# This lookup table maps from dtype.byteorder to a readable endian
# string used by netCDF4.
_endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"}
Expand Down Expand Up @@ -313,6 +303,7 @@ class NetCDF4DataStore(WritableCFDataStore):
def __init__(
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
):
import netCDF4

if isinstance(manager, netCDF4.Dataset):
if group is None:
Expand Down Expand Up @@ -349,6 +340,7 @@ def open(
lock_maker=None,
autoclose=False,
):
import netCDF4

if isinstance(filename, os.PathLike):
filename = os.fspath(filename)
Expand Down Expand Up @@ -537,7 +529,7 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):
backends.ScipyBackendEntrypoint
"""

available = has_netcdf4
available = module_available("netCDF4")
description = (
"Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray"
)
Expand Down
13 changes: 3 additions & 10 deletions xarray/backends/pseudonetcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,14 +16,6 @@
from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock
from .store import StoreBackendEntrypoint

try:
from PseudoNetCDF import pncopen

has_pseudonetcdf = True
except ModuleNotFoundError:
has_pseudonetcdf = False


# psuedonetcdf can invoke netCDF libraries internally
PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK])

Expand Down Expand Up @@ -56,6 +48,7 @@ class PseudoNetCDFDataStore(AbstractDataStore):

@classmethod
def open(cls, filename, lock=None, mode=None, **format_kwargs):
from PseudoNetCDF import pncopen

keywords = {"kwargs": format_kwargs}
# only include mode if explicitly passed
Expand Down Expand Up @@ -128,7 +121,7 @@ class PseudoNetCDFBackendEntrypoint(BackendEntrypoint):
backends.PseudoNetCDFDataStore
"""

available = has_pseudonetcdf
available = module_available("PseudoNetCDF")
description = (
"Open many atmospheric science data formats using PseudoNetCDF in Xarray"
)
Expand Down
24 changes: 12 additions & 12 deletions xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@

from ..core import indexing
from ..core.pycompat import integer_types
from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri
from ..core.utils import (
Frozen,
FrozenDict,
close_on_error,
is_dict_like,
is_remote_uri,
module_available,
)
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,15 +23,6 @@
)
from .store import StoreBackendEntrypoint

try:
import pydap.client
import pydap.lib

pydap_version = pydap.lib.__version__
has_pydap = True
except ModuleNotFoundError:
has_pydap = False


class PydapArrayWrapper(BackendArray):
def __init__(self, array):
Expand Down Expand Up @@ -101,6 +99,8 @@ def open(
verify=None,
user_charset=None,
):
import pydap.client
import pydap.lib

if timeout is None:
from pydap.lib import DEFAULT_TIMEOUT
Expand All @@ -114,7 +114,7 @@ def open(
"output_grid": output_grid or True,
"timeout": timeout,
}
if Version(pydap_version) >= Version("3.3.0"):
if Version(pydap.lib.__version__) >= Version("3.3.0"):
if verify is not None:
kwargs.update({"verify": verify})
if user_charset is not None:
Expand Down Expand Up @@ -154,7 +154,7 @@ class PydapBackendEntrypoint(BackendEntrypoint):
backends.PydapDataStore
"""

available = has_pydap
available = module_available("pydap")
description = "Open remote datasets via OPeNDAP using pydap in Xarray"
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"

Expand Down
13 changes: 3 additions & 10 deletions xarray/backends/pynio_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from ..core import indexing
from ..core.utils import Frozen, FrozenDict, close_on_error
from ..core.utils import Frozen, FrozenDict, close_on_error, module_available
from ..core.variable import Variable
from .common import (
BACKEND_ENTRYPOINTS,
Expand All @@ -16,14 +16,6 @@
from .locks import HDF5_LOCK, NETCDFC_LOCK, SerializableLock, combine_locks, ensure_lock
from .store import StoreBackendEntrypoint

try:
import Nio

has_pynio = True
except ModuleNotFoundError:
has_pynio = False


# PyNIO can invoke netCDF libraries internally
# Add a dedicated lock just in case NCL as well isn't thread-safe.
NCL_LOCK = SerializableLock()
Expand Down Expand Up @@ -61,6 +53,7 @@ class NioDataStore(AbstractDataStore):
"""Store for accessing datasets via PyNIO"""

def __init__(self, filename, mode="r", lock=None, **kwargs):
import Nio

if lock is None:
lock = PYNIO_LOCK
Expand Down Expand Up @@ -101,7 +94,7 @@ def close(self):


class PynioBackendEntrypoint(BackendEntrypoint):
available = has_pynio
available = module_available("Nio")

def open_dataset(
self,
Expand Down
Loading