Skip to content

Commit 554285b

Browse files
Illviljanheadtr1ck
andauthored
Move absolute path finder from open_mfdataset to own function (#7968)
* Move absolute path finder to own function * Update common.py * Workaround for strange \ behavior * More workarounds * Update common.py * Update common.py * Update common.py * Update xarray/backends/common.py Co-authored-by: Michael Niklas <mick.niklas@gmail.com> * Update common.py --------- Co-authored-by: Michael Niklas <mick.niklas@gmail.com>
1 parent 17c9e8f commit 554285b

File tree

2 files changed

+85
-33
lines changed

2 files changed

+85
-33
lines changed

xarray/backends/api.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence
55
from functools import partial
6-
from glob import glob
76
from io import BytesIO
87
from numbers import Number
98
from typing import (
@@ -21,7 +20,12 @@
2120

2221
from xarray import backends, conventions
2322
from xarray.backends import plugins
24-
from xarray.backends.common import AbstractDataStore, ArrayWriter, _normalize_path
23+
from xarray.backends.common import (
24+
AbstractDataStore,
25+
ArrayWriter,
26+
_find_absolute_paths,
27+
_normalize_path,
28+
)
2529
from xarray.backends.locks import _get_scheduler
2630
from xarray.core import indexing
2731
from xarray.core.combine import (
@@ -967,37 +971,7 @@ def open_mfdataset(
967971
.. [1] https://docs.xarray.dev/en/stable/dask.html
968972
.. [2] https://docs.xarray.dev/en/stable/dask.html#chunking-and-performance
969973
"""
970-
if isinstance(paths, str):
971-
if is_remote_uri(paths) and engine == "zarr":
972-
try:
973-
from fsspec.core import get_fs_token_paths
974-
except ImportError as e:
975-
raise ImportError(
976-
"The use of remote URLs for opening zarr requires the package fsspec"
977-
) from e
978-
979-
fs, _, _ = get_fs_token_paths(
980-
paths,
981-
mode="rb",
982-
storage_options=kwargs.get("backend_kwargs", {}).get(
983-
"storage_options", {}
984-
),
985-
expand=False,
986-
)
987-
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
988-
paths = [fs.get_mapper(path) for path in tmp_paths]
989-
elif is_remote_uri(paths):
990-
raise ValueError(
991-
"cannot do wild-card matching for paths that are remote URLs "
992-
f"unless engine='zarr' is specified. Got paths: {paths}. "
993-
"Instead, supply paths as an explicit list of strings."
994-
)
995-
else:
996-
paths = sorted(glob(_normalize_path(paths)))
997-
elif isinstance(paths, os.PathLike):
998-
paths = [os.fspath(paths)]
999-
else:
1000-
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]
974+
paths = _find_absolute_paths(paths, engine=engine, **kwargs)
1001975

1002976
if not paths:
1003977
raise OSError("no files to open")

xarray/backends/common.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import time
66
import traceback
77
from collections.abc import Iterable
8+
from glob import glob
89
from typing import TYPE_CHECKING, Any, ClassVar
910

1011
import numpy as np
@@ -19,6 +20,7 @@
1920
from io import BufferedIOBase
2021

2122
from xarray.core.dataset import Dataset
23+
from xarray.core.types import NestedSequence
2224

2325
# Create a logger object, but don't add any handlers. Leave that to user code.
2426
logger = logging.getLogger(__name__)
@@ -28,6 +30,24 @@
2830

2931

3032
def _normalize_path(path):
33+
"""
34+
Normalize pathlikes to string.
35+
36+
Parameters
37+
----------
38+
path :
39+
Path to file.
40+
41+
Examples
42+
--------
43+
>>> from pathlib import Path
44+
45+
>>> directory = Path(xr.backends.common.__file__).parent
46+
>>> paths_path = Path(directory).joinpath("comm*n.py")
47+
>>> paths_str = xr.backends.common._normalize_path(paths_path)
48+
>>> print([type(p) for p in (paths_str,)])
49+
[<class 'str'>]
50+
"""
3151
if isinstance(path, os.PathLike):
3252
path = os.fspath(path)
3353

@@ -37,6 +57,64 @@ def _normalize_path(path):
3757
return path
3858

3959

60+
def _find_absolute_paths(
61+
paths: str | os.PathLike | NestedSequence[str | os.PathLike], **kwargs
62+
) -> list[str]:
63+
"""
64+
Find absolute paths from the pattern.
65+
66+
Parameters
67+
----------
68+
paths :
69+
Path(s) to file(s). Can include wildcards like * .
70+
**kwargs :
71+
Extra kwargs. Mainly for fsspec.
72+
73+
Examples
74+
--------
75+
>>> from pathlib import Path
76+
77+
>>> directory = Path(xr.backends.common.__file__).parent
78+
>>> paths = str(Path(directory).joinpath("comm*n.py")) # Find common with wildcard
79+
>>> paths = xr.backends.common._find_absolute_paths(paths)
80+
>>> [Path(p).name for p in paths]
81+
['common.py']
82+
"""
83+
if isinstance(paths, str):
84+
if is_remote_uri(paths) and kwargs.get("engine", None) == "zarr":
85+
try:
86+
from fsspec.core import get_fs_token_paths
87+
except ImportError as e:
88+
raise ImportError(
89+
"The use of remote URLs for opening zarr requires the package fsspec"
90+
) from e
91+
92+
fs, _, _ = get_fs_token_paths(
93+
paths,
94+
mode="rb",
95+
storage_options=kwargs.get("backend_kwargs", {}).get(
96+
"storage_options", {}
97+
),
98+
expand=False,
99+
)
100+
tmp_paths = fs.glob(fs._strip_protocol(paths)) # finds directories
101+
paths = [fs.get_mapper(path) for path in tmp_paths]
102+
elif is_remote_uri(paths):
103+
raise ValueError(
104+
"cannot do wild-card matching for paths that are remote URLs "
105+
f"unless engine='zarr' is specified. Got paths: {paths}. "
106+
"Instead, supply paths as an explicit list of strings."
107+
)
108+
else:
109+
paths = sorted(glob(_normalize_path(paths)))
110+
elif isinstance(paths, os.PathLike):
111+
paths = [os.fspath(paths)]
112+
else:
113+
paths = [os.fspath(p) if isinstance(p, os.PathLike) else p for p in paths]
114+
115+
return paths
116+
117+
40118
def _encode_variable_name(name):
41119
if name is None:
42120
name = NONE_VAR_NAME

0 commit comments

Comments
 (0)