|
5 | 5 | from itertools import starmap
|
6 | 6 | from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
7 | 7 |
|
| 8 | +from zarr.core.buffer.core import default_buffer_prototype |
| 9 | +from zarr.core.common import concurrent_map |
| 10 | +from zarr.core.config import config |
| 11 | + |
8 | 12 | if TYPE_CHECKING:
|
9 | 13 | from collections.abc import AsyncGenerator, AsyncIterator, Iterable
|
10 | 14 | from types import TracebackType
|
@@ -344,6 +348,70 @@ async def _get_many(
|
344 | 348 | for req in requests:
|
345 | 349 | yield (req[0], await self.get(*req))
|
346 | 350 |
|
| 351 | + async def getsize(self, key: str) -> int: |
| 352 | + """ |
| 353 | + Return the size, in bytes, of a value in a Store. |
| 354 | +
|
| 355 | + Parameters |
| 356 | + ---------- |
| 357 | + key : str |
| 358 | +
|
| 359 | + Returns |
| 360 | + ------- |
| 361 | + nbytes : int |
| 362 | + The size of the value (in bytes). |
| 363 | +
|
| 364 | + Raises |
| 365 | + ------ |
| 366 | + FileNotFoundError |
| 367 | + When the given key does not exist in the store. |
| 368 | + """ |
| 369 | + # Note to implementers: this default implementation is very inefficient since |
| 370 | + # it requires reading the entire object. Many systems will have ways to get the |
| 371 | + # size of an object without reading it. |
| 372 | + value = await self.get(key, prototype=default_buffer_prototype()) |
| 373 | + if value is None: |
| 374 | + raise FileNotFoundError(key) |
| 375 | + return len(value) |
| 376 | + |
| 377 | + async def getsize_prefix(self, prefix: str) -> int: |
| 378 | + """ |
| 379 | + Return the size, in bytes, of all values under a prefix. |
| 380 | +
|
| 381 | + Parameters |
| 382 | + ---------- |
| 383 | + prefix : str |
| 384 | + The prefix of the directory to measure. |
| 385 | +
|
| 386 | + Returns |
| 387 | + ------- |
| 388 | + nbytes : int |
| 389 | + The sum of the sizes of the values in the directory (in bytes). |
| 390 | +
|
| 391 | + See Also |
| 392 | + -------- |
| 393 | + zarr.Array.nbytes_stored |
| 394 | + Store.getsize |
| 395 | +
|
| 396 | + Notes |
| 397 | + ----- |
| 398 | + ``getsize_prefix`` is just provided as a potentially faster alternative to |
| 399 | + listing all the keys under a prefix calling :meth:`Store.getsize` on each. |
| 400 | +
|
| 401 | + In general, ``prefix`` should be the path of an Array or Group in the Store. |
| 402 | + Implementations may differ on the behavior when some other ``prefix`` |
| 403 | + is provided. |
| 404 | + """ |
| 405 | + # TODO: Overlap listing keys with getsize calls. |
| 406 | + # Currently, we load the list of keys into memory and only then move |
| 407 | + # on to getting sizes. Ideally we would overlap those two, which should |
| 408 | + # improve tail latency and might reduce memory pressure (since not all keys |
| 409 | + # would be in memory at once). |
| 410 | + keys = [(x,) async for x in self.list_prefix(prefix)] |
| 411 | + limit = config.get("async.concurrency") |
| 412 | + sizes = await concurrent_map(keys, self.getsize, limit=limit) |
| 413 | + return sum(sizes) |
| 414 | + |
347 | 415 |
|
348 | 416 | @runtime_checkable
|
349 | 417 | class ByteGetter(Protocol):
|
|
0 commit comments