Skip to content

Commit

Permalink
Added Store.getsize
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Oct 21, 2024
1 parent 329612e commit 5e0ffe8
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 1 deletion.
28 changes: 28 additions & 0 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from itertools import starmap
from typing import TYPE_CHECKING, NamedTuple, Protocol, runtime_checkable

from zarr.core.buffer.core import default_buffer_prototype

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Iterable
from types import TracebackType
Expand Down Expand Up @@ -386,6 +388,32 @@ async def _get_many(
for req in requests:
yield (req[0], await self.get(*req))

async def getsize(self, key: str) -> int:
"""
Return the size, in bytes, of a value in a Store.
Parameters
----------
key : str
Returns
-------
nbytes: int
The size of the value in bytes.
Raises
------
FileNotFoundError
When the given key does not exist in the store.
"""
# Note to implementers: this default implementation is very inefficient since
# it requires reading the entire object. Many systems will have ways to get the
# size of an object without reading it.
value = await self.get(key, prototype=default_buffer_prototype())
if value is None:
raise FileNotFoundError(key)
return len(value)


@runtime_checkable
class ByteGetter(Protocol):
Expand Down
3 changes: 3 additions & 0 deletions src/zarr/storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,6 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
yield str(key).replace(to_strip, "")
except (FileNotFoundError, NotADirectoryError):
pass

async def getsize(self, key: str) -> int:
return os.path.getsize(self.root / key)
15 changes: 14 additions & 1 deletion src/zarr/storage/remote.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Self
from typing import TYPE_CHECKING, Any, Self, cast

import fsspec

Expand Down Expand Up @@ -301,3 +301,16 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
find_str = f"{self.path}/{prefix}"
for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False):
yield onefile.removeprefix(find_str)

async def getsize(self, key: str) -> int:
path = _dereference_path(self.path, key)
info = await self.fs._info(path)

size = info.get("size")

if size is None:
# Not all filesystems support size. Fall back to reading the entire object
return await super().getsize(key)
else:
# fsspec doesn't have typing. We'll need to assume this is correct.
return cast(int, size)
12 changes: 12 additions & 0 deletions src/zarr/testing/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,3 +338,15 @@ async def test_set_if_not_exists(self, store: S) -> None:

result = await store.get("k2", default_buffer_prototype())
assert result == new

async def test_getsize(self, store: S) -> None:
key = "k"
data = self.buffer_cls.from_bytes(b"0" * 10)
await self.set(store, key, data)

result = await store.getsize(key)
assert result == 10

async def test_getsize_raises(self, store: S) -> None:
with pytest.raises(FileNotFoundError):
await store.getsize("not-a-real-key")

0 comments on commit 5e0ffe8

Please sign in to comment.