Skip to content

Commit a31046c

Browse files
jhammand-v-bpre-commit-ci[bot]
authored
Feature: store learns to delete prefixes when overwriting/creating hierarchy nodes (#2430)
* fix(array): thread order parameter through to array __init__ * type fixes * move defaults * apply MemoryOrder type to ArrayV2Metadata * more more * more more more * feature(store,group,array): stores learn to delete prefixes when overwriting nodes - add Store.delete_dir and Store.delete_prefix - update array and group creation methods to call delete_dir - change list_prefix to return absolue keys * fixup * fixup * respond to review * fixup * fixup * Update src/zarr/abc/store.py * style: pre-commit fixes --------- Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 7bceb58 commit a31046c

File tree

14 files changed

+113
-37
lines changed

14 files changed

+113
-37
lines changed

src/zarr/abc/store.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,8 @@ def list(self) -> AsyncGenerator[str, None]:
342342
@abstractmethod
343343
def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
344344
"""
345-
Retrieve all keys in the store that begin with a given prefix. Keys are returned with the
346-
common leading prefix removed.
345+
Retrieve all keys in the store that begin with a given prefix. Keys are returned relative
346+
to the root of the store.
347347
348348
Parameters
349349
----------
@@ -371,6 +371,20 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
371371
"""
372372
...
373373

374+
async def delete_dir(self, prefix: str) -> None:
375+
"""
376+
Remove all keys and prefixes in the store that begin with a given prefix.
377+
"""
378+
if not self.supports_deletes:
379+
raise NotImplementedError
380+
if not self.supports_listing:
381+
raise NotImplementedError
382+
self._check_writable()
383+
if not prefix.endswith("/"):
384+
prefix += "/"
385+
async for key in self.list_prefix(prefix):
386+
await self.delete(key)
387+
374388
def close(self) -> None:
375389
"""Close the store."""
376390
self._is_open = False

src/zarr/core/array.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,12 @@ async def _create_v3(
553553
attributes: dict[str, JSON] | None = None,
554554
exists_ok: bool = False,
555555
) -> AsyncArray[ArrayV3Metadata]:
556-
if not exists_ok:
556+
if exists_ok:
557+
if store_path.store.supports_deletes:
558+
await store_path.delete_dir()
559+
else:
560+
await ensure_no_existing_node(store_path, zarr_format=3)
561+
else:
557562
await ensure_no_existing_node(store_path, zarr_format=3)
558563

559564
shape = parse_shapelike(shape)
@@ -605,7 +610,12 @@ async def _create_v2(
605610
attributes: dict[str, JSON] | None = None,
606611
exists_ok: bool = False,
607612
) -> AsyncArray[ArrayV2Metadata]:
608-
if not exists_ok:
613+
if exists_ok:
614+
if store_path.store.supports_deletes:
615+
await store_path.delete_dir()
616+
else:
617+
await ensure_no_existing_node(store_path, zarr_format=2)
618+
else:
609619
await ensure_no_existing_node(store_path, zarr_format=2)
610620

611621
if order is None:

src/zarr/core/group.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,13 @@ async def from_store(
404404
zarr_format: ZarrFormat = 3,
405405
) -> AsyncGroup:
406406
store_path = await make_store_path(store)
407-
if not exists_ok:
407+
408+
if exists_ok:
409+
if store_path.store.supports_deletes:
410+
await store_path.delete_dir()
411+
else:
412+
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
413+
else:
408414
await ensure_no_existing_node(store_path, zarr_format=zarr_format)
409415
attributes = attributes or {}
410416
group = cls(
@@ -727,19 +733,8 @@ def _getitem_consolidated(
727733

728734
async def delitem(self, key: str) -> None:
729735
store_path = self.store_path / key
730-
if self.metadata.zarr_format == 3:
731-
await (store_path / ZARR_JSON).delete()
732-
733-
elif self.metadata.zarr_format == 2:
734-
await asyncio.gather(
735-
(store_path / ZGROUP_JSON).delete(), # TODO: missing_ok=False
736-
(store_path / ZARRAY_JSON).delete(), # TODO: missing_ok=False
737-
(store_path / ZATTRS_JSON).delete(), # TODO: missing_ok=True
738-
)
739-
740-
else:
741-
raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}")
742736

737+
await store_path.delete_dir()
743738
if self.metadata.consolidated_metadata:
744739
self.metadata.consolidated_metadata.metadata.pop(key, None)
745740
await self._save_metadata()

src/zarr/storage/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,15 @@ async def delete(self) -> None:
101101
"""
102102
await self.store.delete(self.path)
103103

104+
async def delete_dir(self) -> None:
105+
"""
106+
Delete all keys with the given prefix from the store.
107+
"""
108+
path = self.path
109+
if not path.endswith("/"):
110+
path += "/"
111+
await self.store.delete_dir(path)
112+
104113
async def set_if_not_exists(self, default: Buffer) -> None:
105114
"""
106115
Store a key to ``value`` if the key is not already present.

src/zarr/storage/local.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,8 @@ async def list(self) -> AsyncGenerator[str, None]:
226226

227227
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
228228
# docstring inherited
229-
to_strip = (
230-
(self.root / prefix).as_posix() + "/"
231-
) # TODO: fixme in https://github.com/zarr-developers/zarr-python/issues/2438
229+
to_strip = self.root.as_posix() + "/"
230+
prefix = prefix.rstrip("/")
232231
for p in (self.root / prefix).rglob("*"):
233232
if p.is_file():
234233
yield p.as_posix().replace(to_strip, "")

src/zarr/storage/logging.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
222222
async for key in self._store.list_dir(prefix=prefix):
223223
yield key
224224

225+
async def delete_dir(self, prefix: str) -> None:
226+
# docstring inherited
227+
with self.log(prefix):
228+
await self._store.delete_dir(prefix=prefix)
229+
225230
def with_mode(self, mode: AccessModeLiteral) -> Self:
226231
# docstring inherited
227232
with self.log(mode):

src/zarr/storage/memory.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from logging import getLogger
34
from typing import TYPE_CHECKING, Self
45

56
from zarr.abc.store import ByteRangeRequest, Store
@@ -14,6 +15,9 @@
1415
from zarr.core.common import AccessModeLiteral
1516

1617

18+
logger = getLogger(__name__)
19+
20+
1721
class MemoryStore(Store):
1822
"""
1923
In-memory store for testing purposes.
@@ -137,7 +141,7 @@ async def delete(self, key: str) -> None:
137141
try:
138142
del self._store_dict[key]
139143
except KeyError:
140-
pass
144+
logger.debug("Key %s does not exist.", key)
141145

142146
async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None:
143147
# docstring inherited
@@ -150,9 +154,10 @@ async def list(self) -> AsyncGenerator[str, None]:
150154

151155
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
152156
# docstring inherited
153-
for key in self._store_dict:
157+
# note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix)
158+
for key in list(self._store_dict):
154159
if key.startswith(prefix):
155-
yield key.removeprefix(prefix)
160+
yield key
156161

157162
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
158163
# docstring inherited

src/zarr/storage/remote.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
340340

341341
async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
342342
# docstring inherited
343-
find_str = f"{self.path}/{prefix}"
344-
for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False):
345-
yield onefile.removeprefix(find_str)
343+
for onefile in await self.fs._find(
344+
f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False
345+
):
346+
yield onefile.removeprefix(f"{self.path}/")

src/zarr/storage/zip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
244244
# docstring inherited
245245
async for key in self.list():
246246
if key.startswith(prefix):
247-
yield key.removeprefix(prefix)
247+
yield key
248248

249249
async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
250250
# docstring inherited

src/zarr/testing/store.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,26 @@ async def test_exists(self, store: S) -> None:
213213
assert await store.exists("foo/zarr.json")
214214

215215
async def test_delete(self, store: S) -> None:
216+
if not store.supports_deletes:
217+
pytest.skip("store does not support deletes")
216218
await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
217219
assert await store.exists("foo/zarr.json")
218220
await store.delete("foo/zarr.json")
219221
assert not await store.exists("foo/zarr.json")
220222

223+
async def test_delete_dir(self, store: S) -> None:
224+
if not store.supports_deletes:
225+
pytest.skip("store does not support deletes")
226+
await store.set("zarr.json", self.buffer_cls.from_bytes(b"root"))
227+
await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(b"root"))
228+
await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar"))
229+
await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk"))
230+
await store.delete_dir("foo")
231+
assert await store.exists("zarr.json")
232+
assert await store.exists("foo-bar/zarr.json")
233+
assert not await store.exists("foo/zarr.json")
234+
assert not await store.exists("foo/c/0")
235+
221236
async def test_empty(self, store: S) -> None:
222237
assert await store.empty()
223238
await self.set(
@@ -249,8 +264,7 @@ async def test_list(self, store: S) -> None:
249264
async def test_list_prefix(self, store: S) -> None:
250265
"""
251266
Test that the `list_prefix` method works as intended. Given a prefix, it should return
252-
all the keys in storage that start with this prefix. Keys should be returned with the shared
253-
prefix removed.
267+
all the keys in storage that start with this prefix.
254268
"""
255269
prefixes = ("", "a/", "a/b/", "a/b/c/")
256270
data = self.buffer_cls.from_bytes(b"")
@@ -264,7 +278,7 @@ async def test_list_prefix(self, store: S) -> None:
264278
expected: tuple[str, ...] = ()
265279
for key in store_dict:
266280
if key.startswith(prefix):
267-
expected += (key.removeprefix(prefix),)
281+
expected += (key,)
268282
expected = tuple(sorted(expected))
269283
assert observed == expected
270284

0 commit comments

Comments
 (0)