Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Buffer Prototype Argument #1910

Merged
merged 22 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/zarr/abc/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

from zarr.abc.metadata import Metadata
from zarr.abc.store import ByteGetter, ByteSetter
from zarr.array_spec import ArraySpec
from zarr.buffer import Buffer, NDBuffer
from zarr.common import concurrent_map
from zarr.common import SliceSelection, concurrent_map
from zarr.config import config

if TYPE_CHECKING:
from typing_extensions import Self

from zarr.common import ArraySpec, SliceSelection
from zarr.metadata import ArrayMetadata


Expand Down
17 changes: 12 additions & 5 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections.abc import AsyncGenerator
from typing import Protocol, runtime_checkable

from zarr.buffer import Buffer
from zarr.buffer import Buffer, Prototype
from zarr.common import BytesLike, OpenMode


Expand Down Expand Up @@ -30,7 +30,10 @@ def _check_writable(self) -> None:

@abstractmethod
async def get(
self, key: str, byte_range: tuple[int | None, int | None] | None = None
self,
key: str,
prototype: Prototype,
byte_range: tuple[int | None, int | None] | None = None,
) -> Buffer | None:
"""Retrieve the value associated with a given key.

Expand All @@ -47,7 +50,7 @@ async def get(

@abstractmethod
async def get_partial_values(
self, key_ranges: list[tuple[str, tuple[int | None, int | None]]]
self, prototype: Prototype, key_ranges: list[tuple[str, tuple[int | None, int | None]]]
) -> list[Buffer | None]:
"""Retrieve possibly partial values from given key_ranges.

Expand Down Expand Up @@ -175,12 +178,16 @@ def close(self) -> None: # noqa: B027

@runtime_checkable
class ByteGetter(Protocol):
async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: ...
async def get(
self, prototype: Prototype, byte_range: tuple[int, int | None] | None = None
) -> Buffer | None: ...


@runtime_checkable
class ByteSetter(Protocol):
async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: ...
async def get(
self, prototype: Prototype, byte_range: tuple[int, int | None] | None = None
) -> Buffer | None: ...

async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: ...

Expand Down
15 changes: 8 additions & 7 deletions src/zarr/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from zarr.abc.codec import Codec
from zarr.abc.store import set_or_delete
from zarr.attributes import Attributes
from zarr.buffer import Factory, NDArrayLike, NDBuffer
from zarr.buffer import NDArrayLike, Prototype, default_prototype
from zarr.chunk_grids import RegularChunkGrid
from zarr.chunk_key_encodings import ChunkKeyEncoding, DefaultChunkKeyEncoding, V2ChunkKeyEncoding
from zarr.codecs import BytesCodec
Expand Down Expand Up @@ -356,7 +356,7 @@ def attrs(self) -> dict[str, JSON]:
return self.metadata.attributes

async def getitem(
self, selection: Selection, *, factory: Factory.Create = NDBuffer.create
self, selection: Selection, *, prototype: Prototype = default_prototype
) -> NDArrayLike:
indexer = BasicIndexer(
selection,
Expand All @@ -365,7 +365,7 @@ async def getitem(
)

# setup output array
out = factory(
out = prototype.nd_buffer.create(
shape=indexer.shape,
dtype=self.metadata.dtype,
order=self.order,
Expand All @@ -377,7 +377,7 @@ async def getitem(
[
(
self.store_path / self.metadata.encode_chunk_key(chunk_coords),
self.metadata.get_chunk_spec(chunk_coords, self.order),
self.metadata.get_chunk_spec(chunk_coords, self.order, prototype),
chunk_selection,
out_selection,
)
Expand All @@ -396,7 +396,8 @@ async def setitem(
self,
selection: Selection,
value: NDArrayLike,
factory: Factory.NDArrayLike = NDBuffer.from_ndarray_like,
*,
prototype: Prototype = default_prototype,
) -> None:
indexer = BasicIndexer(
selection,
Expand All @@ -419,14 +420,14 @@ async def setitem(
# We accept any ndarray like object from the user and convert it
# to a NDBuffer (or subclass). From this point onwards, we only pass
# Buffer and NDBuffer between components.
value_buffer = factory(value)
value_buffer = prototype.nd_buffer.from_ndarray_like(value)

# merging with existing data and encoding chunks
await self.metadata.codec_pipeline.write(
[
(
self.store_path / self.metadata.encode_chunk_key(chunk_coords),
self.metadata.get_chunk_spec(chunk_coords, self.order),
self.metadata.get_chunk_spec(chunk_coords, self.order, prototype),
chunk_selection,
out_selection,
)
Expand Down
41 changes: 41 additions & 0 deletions src/zarr/array_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal

import numpy as np

from zarr.buffer import Prototype
from zarr.common import ChunkCoords, parse_dtype, parse_fill_value, parse_order, parse_shapelike


@dataclass(frozen=True)
class ArraySpec:
shape: ChunkCoords
dtype: np.dtype[Any]
fill_value: Any
order: Literal["C", "F"]
prototype: Prototype
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the Prototype pickleable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default_prototype, which is the one we use in ArrayV3Metadata, is pickleable.
AFAIK, it is not possible to type hint pickable but added the requirement in the docstring.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing that has to be pickleable is the class, not the object itself. I'm having a hard time imagining any scenario in which a class is not pickleable.


def __init__(
self,
shape: ChunkCoords,
dtype: np.dtype[Any],
fill_value: Any,
order: Literal["C", "F"],
prototype: Prototype,
) -> None:
shape_parsed = parse_shapelike(shape)
dtype_parsed = parse_dtype(dtype)
fill_value_parsed = parse_fill_value(fill_value)
order_parsed = parse_order(order)

object.__setattr__(self, "shape", shape_parsed)
object.__setattr__(self, "dtype", dtype_parsed)
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "order", order_parsed)
object.__setattr__(self, "prototype", prototype)

@property
def ndim(self) -> int:
return len(self.shape)
Loading
Loading