Skip to content

Commit 29246d6

Browse files
d-v-bjhamman
andauthored
fix/normalize storage paths (#2384)
* bring in path normalization function from v2, and add a failing test * rephrase comment * simplify storepath creation * Update tests/v3/test_api.py Co-authored-by: Joe Hamman <joe@earthmover.io> * refactor: remove redundant zarr format fixture * replace assertion with an informative error message * fix incorrect path concatenation in make_store_path, and refactor store_path tests * remove upath import because we don't need it * apply suggestions from code review --------- Co-authored-by: Joe Hamman <joe@earthmover.io>
1 parent 1131253 commit 29246d6

File tree

8 files changed

+180
-65
lines changed

8 files changed

+180
-65
lines changed

src/zarr/api/asynchronous.py

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,7 @@ async def consolidate_metadata(
170170
The group, with the ``consolidated_metadata`` field set to include
171171
the metadata of each child node.
172172
"""
173-
store_path = await make_store_path(store)
174-
175-
if path is not None:
176-
store_path = store_path / path
173+
store_path = await make_store_path(store, path=path)
177174

178175
group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False)
179176
group.store_path.store._check_writable()
@@ -291,10 +288,7 @@ async def open(
291288
"""
292289
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
293290

294-
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
295-
296-
if path is not None:
297-
store_path = store_path / path
291+
store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options)
298292

299293
if "shape" not in kwargs and mode in {"a", "w", "w-"}:
300294
try:
@@ -401,9 +395,7 @@ async def save_array(
401395
)
402396

403397
mode = kwargs.pop("mode", None)
404-
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
405-
if path is not None:
406-
store_path = store_path / path
398+
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
407399
new = await AsyncArray.create(
408400
store_path,
409401
zarr_format=zarr_format,
@@ -582,9 +574,7 @@ async def group(
582574

583575
mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a")
584576

585-
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
586-
if path is not None:
587-
store_path = store_path / path
577+
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
588578

589579
if chunk_store is not None:
590580
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)
@@ -697,9 +687,7 @@ async def open_group(
697687
if chunk_store is not None:
698688
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)
699689

700-
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
701-
if path is not None:
702-
store_path = store_path / path
690+
store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path)
703691

704692
if attributes is None:
705693
attributes = {}
@@ -883,9 +871,7 @@ async def create(
883871
if not isinstance(store, Store | StorePath):
884872
mode = "a"
885873

886-
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
887-
if path is not None:
888-
store_path = store_path / path
874+
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
889875

890876
return await AsyncArray.create(
891877
store_path,
@@ -925,6 +911,7 @@ async def empty(
925911
retrieve data from an empty Zarr array, any values may be returned,
926912
and these are not guaranteed to be stable from one access to the next.
927913
"""
914+
928915
return await create(shape=shape, fill_value=None, **kwargs)
929916

930917

@@ -1044,7 +1031,7 @@ async def open_array(
10441031
store: StoreLike | None = None,
10451032
zarr_version: ZarrFormat | None = None, # deprecated
10461033
zarr_format: ZarrFormat | None = None,
1047-
path: PathLike | None = None,
1034+
path: PathLike = "",
10481035
storage_options: dict[str, Any] | None = None,
10491036
**kwargs: Any, # TODO: type kwargs as valid args to save
10501037
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
@@ -1071,9 +1058,7 @@ async def open_array(
10711058
"""
10721059

10731060
mode = kwargs.pop("mode", None)
1074-
store_path = await make_store_path(store, mode=mode)
1075-
if path is not None:
1076-
store_path = store_path / path
1061+
store_path = await make_store_path(store, path=path, mode=mode)
10771062

10781063
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
10791064

src/zarr/storage/_utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,49 @@
11
from __future__ import annotations
22

3+
import re
4+
from pathlib import Path
35
from typing import TYPE_CHECKING
46

57
if TYPE_CHECKING:
68
from zarr.core.buffer import Buffer
79

810

11+
def normalize_path(path: str | bytes | Path | None) -> str:
12+
if path is None:
13+
result = ""
14+
elif isinstance(path, bytes):
15+
result = str(path, "ascii")
16+
17+
# handle pathlib.Path
18+
elif isinstance(path, Path):
19+
result = str(path)
20+
21+
elif isinstance(path, str):
22+
result = path
23+
24+
else:
25+
raise TypeError(f'Object {path} has an invalid type for "path": {type(path).__name__}')
26+
27+
# convert backslash to forward slash
28+
result = result.replace("\\", "/")
29+
30+
# remove leading and trailing slashes
31+
result = result.strip("/")
32+
33+
# collapse any repeated slashes
34+
pat = re.compile(r"//+")
35+
result = pat.sub("/", result)
36+
37+
# disallow path segments with just '.' or '..'
38+
segments = result.split("/")
39+
if any(s in {".", ".."} for s in segments):
40+
raise ValueError(
41+
f"The path {path!r} is invalid because its string representation contains '.' or '..' segments."
42+
)
43+
44+
return result
45+
46+
947
def _normalize_interval_index(
1048
data: Buffer, interval: None | tuple[int | None, int | None]
1149
) -> tuple[int, int]:

src/zarr/storage/common.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from zarr.core.buffer import Buffer, default_buffer_prototype
99
from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat
1010
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
11+
from zarr.storage._utils import normalize_path
1112
from zarr.storage.local import LocalStore
1213
from zarr.storage.memory import MemoryStore
1314

@@ -41,9 +42,9 @@ class StorePath:
4142
store: Store
4243
path: str
4344

44-
def __init__(self, store: Store, path: str | None = None) -> None:
45+
def __init__(self, store: Store, path: str = "") -> None:
4546
self.store = store
46-
self.path = path or ""
47+
self.path = path
4748

4849
async def get(
4950
self,
@@ -159,6 +160,7 @@ def __eq__(self, other: object) -> bool:
159160
async def make_store_path(
160161
store_like: StoreLike | None,
161162
*,
163+
path: str | None = "",
162164
mode: AccessModeLiteral | None = None,
163165
storage_options: dict[str, Any] | None = None,
164166
) -> StorePath:
@@ -189,6 +191,9 @@ async def make_store_path(
189191
----------
190192
store_like : StoreLike | None
191193
The object to convert to a `StorePath` object.
194+
path: str | None, optional
195+
The path to use when creating the `StorePath` object. If None, the
196+
default path is the empty string.
192197
mode : AccessModeLiteral | None, optional
193198
The mode to use when creating the `StorePath` object. If None, the
194199
default mode is 'r'.
@@ -209,37 +214,44 @@ async def make_store_path(
209214
from zarr.storage.remote import RemoteStore # circular import
210215

211216
used_storage_options = False
212-
217+
path_normalized = normalize_path(path)
213218
if isinstance(store_like, StorePath):
214219
if mode is not None and mode != store_like.store.mode.str:
215220
_store = store_like.store.with_mode(mode)
216221
await _store._ensure_open()
217-
store_like = StorePath(_store)
218-
result = store_like
222+
store_like = StorePath(_store, path=store_like.path)
223+
result = store_like / path_normalized
219224
elif isinstance(store_like, Store):
220225
if mode is not None and mode != store_like.mode.str:
221226
store_like = store_like.with_mode(mode)
222227
await store_like._ensure_open()
223-
result = StorePath(store_like)
228+
result = StorePath(store_like, path=path_normalized)
224229
elif store_like is None:
225230
# mode = "w" is an exception to the default mode = 'r'
226-
result = StorePath(await MemoryStore.open(mode=mode or "w"))
231+
result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized)
227232
elif isinstance(store_like, Path):
228-
result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r"))
233+
result = StorePath(
234+
await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized
235+
)
229236
elif isinstance(store_like, str):
230237
storage_options = storage_options or {}
231238

232239
if _is_fsspec_uri(store_like):
233240
used_storage_options = True
234241
result = StorePath(
235-
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r")
242+
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r"),
243+
path=path_normalized,
236244
)
237245
else:
238-
result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r"))
246+
result = StorePath(
247+
await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized
248+
)
239249
elif isinstance(store_like, dict):
240250
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
241251
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
242-
result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r"))
252+
result = StorePath(
253+
await MemoryStore.open(store_dict=store_like, mode=mode or "r"), path=path_normalized
254+
)
243255
else:
244256
msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable]
245257
raise TypeError(msg)

src/zarr/storage/local.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,10 @@ def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None:
9797
super().__init__(mode=mode)
9898
if isinstance(root, str):
9999
root = Path(root)
100-
assert isinstance(root, Path)
100+
if not isinstance(root, Path):
101+
raise TypeError(
102+
f'"root" must be a string or Path instance. Got an object with type {type(root)} instead.'
103+
)
101104
self.root = root
102105

103106
async def _open(self) -> None:

tests/v3/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]:
138138
)
139139

140140

141-
@pytest.fixture(params=(2, 3))
141+
@pytest.fixture(params=(2, 3), ids=["zarr2", "zarr3"])
142142
def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat:
143143
if request.param == 2:
144144
return 2

tests/v3/test_api.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pathlib
22
import warnings
3+
from typing import Literal
34

45
import numpy as np
56
import pytest
@@ -10,9 +11,19 @@
1011
import zarr.core.group
1112
from zarr import Array, Group
1213
from zarr.abc.store import Store
13-
from zarr.api.synchronous import create, group, load, open, open_group, save, save_array, save_group
14+
from zarr.api.synchronous import (
15+
create,
16+
group,
17+
load,
18+
open,
19+
open_group,
20+
save,
21+
save_array,
22+
save_group,
23+
)
1424
from zarr.core.common import ZarrFormat
1525
from zarr.errors import MetadataValidationError
26+
from zarr.storage._utils import normalize_path
1627
from zarr.storage.memory import MemoryStore
1728

1829

@@ -37,6 +48,20 @@ def test_create_array(memory_store: Store) -> None:
3748
assert z.chunks == (40,)
3849

3950

51+
@pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"])
52+
@pytest.mark.parametrize("node_type", ["array", "group"])
53+
def test_open_normalized_path(
54+
memory_store: MemoryStore, path: str, node_type: Literal["array", "group"]
55+
) -> None:
56+
node: Group | Array
57+
if node_type == "group":
58+
node = group(store=memory_store, path=path)
59+
elif node_type == "array":
60+
node = create(store=memory_store, path=path, shape=(2,))
61+
62+
assert node.path == normalize_path(path)
63+
64+
4065
async def test_open_array(memory_store: MemoryStore) -> None:
4166
store = memory_store
4267

tests/v3/test_group.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import contextlib
44
import pickle
55
import warnings
6-
from typing import TYPE_CHECKING, Any, Literal, cast
6+
from typing import TYPE_CHECKING, Any, Literal
77

88
import numpy as np
99
import pytest
@@ -14,7 +14,6 @@
1414
from zarr import Array, AsyncArray, AsyncGroup, Group
1515
from zarr.abc.store import Store
1616
from zarr.core.buffer import default_buffer_prototype
17-
from zarr.core.common import JSON, ZarrFormat
1817
from zarr.core.group import ConsolidatedMetadata, GroupMetadata
1918
from zarr.core.sync import sync
2019
from zarr.errors import ContainsArrayError, ContainsGroupError
@@ -26,6 +25,8 @@
2625
if TYPE_CHECKING:
2726
from _pytest.compat import LEGACY_PATH
2827

28+
from zarr.core.common import JSON, ZarrFormat
29+
2930

3031
@pytest.fixture(params=["local", "memory", "zip"])
3132
async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store:
@@ -43,14 +44,6 @@ def exists_ok(request: pytest.FixtureRequest) -> bool:
4344
return result
4445

4546

46-
@pytest.fixture(params=[2, 3], ids=["zarr2", "zarr3"])
47-
def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat:
48-
result = request.param
49-
if result not in (2, 3):
50-
raise ValueError("Wrong value returned from test fixture.")
51-
return cast(ZarrFormat, result)
52-
53-
5447
def test_group_init(store: Store, zarr_format: ZarrFormat) -> None:
5548
"""
5649
Test that initializing a group from an asyncgroup works.
@@ -587,6 +580,7 @@ def test_group_array_creation(
587580
assert empty_array.fill_value == 0
588581
assert empty_array.shape == shape
589582
assert empty_array.store_path.store == store
583+
assert empty_array.store_path.path == "empty"
590584

591585
empty_like_array = group.empty_like(name="empty_like", data=empty_array)
592586
assert isinstance(empty_like_array, Array)

0 commit comments

Comments
 (0)