Skip to content

Commit 5ca080d

Browse files
authored
feat: metadata-only support for storage transformers metadata (#2180)
* feat: meager support for storage transformers metadata * remove warning, and instead error when creating v3 arrays with storage transformers * unbreak test fixture
1 parent 4515671 commit 5ca080d

File tree

4 files changed

+63
-2
lines changed

4 files changed

+63
-2
lines changed

src/zarr/core/array.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
8282
return data
8383
elif isinstance(data, dict):
8484
if data["zarr_format"] == 3:
85-
return ArrayV3Metadata.from_dict(data)
85+
meta_out = ArrayV3Metadata.from_dict(data)
86+
if len(meta_out.storage_transformers) > 0:
87+
msg = (
88+
f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
89+
"Arrays with storage transformers are not supported in zarr-python at this time."
90+
)
91+
raise ValueError(msg)
92+
return meta_out
8693
elif data["zarr_format"] == 2:
8794
return ArrayV2Metadata.from_dict(data)
8895
raise TypeError

src/zarr/core/metadata/v3.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
7272
raise TypeError(msg)
7373

7474

75+
def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
76+
"""
77+
Parse storage_transformers. Zarr python cannot use storage transformers
78+
at this time, so this function doesn't attempt to validate them.
79+
"""
80+
if data is None:
81+
return ()
82+
if isinstance(data, Iterable):
83+
if len(tuple(data)) >= 1:
84+
return data # type: ignore[return-value]
85+
else:
86+
return ()
87+
raise TypeError(
88+
f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead."
89+
)
90+
91+
7592
class V3JsonEncoder(json.JSONEncoder):
7693
def __init__(self, *args: Any, **kwargs: Any) -> None:
7794
self.indent = kwargs.pop("indent", config.get("json_indent"))
@@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata):
144161
dimension_names: tuple[str, ...] | None = None
145162
zarr_format: Literal[3] = field(default=3, init=False)
146163
node_type: Literal["array"] = field(default="array", init=False)
164+
storage_transformers: tuple[dict[str, JSON], ...]
147165

148166
def __init__(
149167
self,
@@ -156,6 +174,7 @@ def __init__(
156174
codecs: Iterable[Codec | dict[str, JSON]],
157175
attributes: None | dict[str, JSON],
158176
dimension_names: None | Iterable[str],
177+
storage_transformers: None | Iterable[dict[str, JSON]] = None,
159178
) -> None:
160179
"""
161180
Because the class is a frozen dataclass, we set attributes using object.__setattr__
@@ -168,6 +187,7 @@ def __init__(
168187
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
169188
attributes_parsed = parse_attributes(attributes)
170189
codecs_parsed_partial = parse_codecs(codecs)
190+
storage_transformers_parsed = parse_storage_transformers(storage_transformers)
171191

172192
array_spec = ArraySpec(
173193
shape=shape_parsed,
@@ -186,6 +206,7 @@ def __init__(
186206
object.__setattr__(self, "dimension_names", dimension_names_parsed)
187207
object.__setattr__(self, "fill_value", fill_value_parsed)
188208
object.__setattr__(self, "attributes", attributes_parsed)
209+
object.__setattr__(self, "storage_transformers", storage_transformers_parsed)
189210

190211
self._validate_metadata()
191212

tests/v3/test_array.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import pytest
77

88
from zarr import Array, AsyncArray, Group
9+
from zarr.codecs.bytes import BytesCodec
910
from zarr.core.array import chunks_initialized
1011
from zarr.core.buffer.cpu import NDBuffer
11-
from zarr.core.common import ZarrFormat
12+
from zarr.core.common import JSON, ZarrFormat
1213
from zarr.core.indexing import ceildiv
1314
from zarr.core.sync import sync
1415
from zarr.errors import ContainsArrayError, ContainsGroupError
@@ -238,6 +239,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->
238239
np.testing.assert_array_equal(actual[:], expected[:])
239240

240241

242+
@pytest.mark.parametrize("store", ["memory"], indirect=True)
243+
def test_storage_transformers(store: MemoryStore) -> None:
244+
"""
245+
Test that providing an actual storage transformer produces a warning and otherwise passes through
246+
"""
247+
metadata_dict: dict[str, JSON] = {
248+
"zarr_format": 3,
249+
"node_type": "array",
250+
"shape": (10,),
251+
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
252+
"data_type": "uint8",
253+
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
254+
"codecs": (BytesCodec().to_dict(),),
255+
"fill_value": 0,
256+
"storage_transformers": ({"test": "should_raise"}),
257+
}
258+
match = "Arrays with storage transformers are not supported in zarr-python at this time."
259+
with pytest.raises(ValueError, match=match):
260+
Array.from_dict(StorePath(store), data=metadata_dict)
261+
262+
241263
@pytest.mark.parametrize("test_cls", [Array, AsyncArray])
242264
@pytest.mark.parametrize("nchunks", [2, 5, 10])
243265
def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None:

tests/v3/test_metadata/test_v3.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import Any
1515

1616
from zarr.abc.codec import Codec
17+
from zarr.core.common import JSON
1718

1819

1920
import numpy as np
@@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
196197
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
197198
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
198199
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
200+
@pytest.mark.parametrize("storage_transformers", [None, ()])
199201
def test_metadata_to_dict(
200202
chunk_grid: str,
201203
codecs: list[Codec],
@@ -204,6 +206,7 @@ def test_metadata_to_dict(
204206
dimension_separator: Literal[".", "/"] | None,
205207
dimension_names: Literal["nones", "strings", "missing"],
206208
attributes: None | dict[str, Any],
209+
storage_transformers: None | tuple[dict[str, JSON]],
207210
) -> None:
208211
shape = (1, 2, 3)
209212
data_type = "uint8"
@@ -234,6 +237,7 @@ def test_metadata_to_dict(
234237
"chunk_key_encoding": cke,
235238
"codecs": tuple(c.to_dict() for c in codecs),
236239
"fill_value": fill_value,
240+
"storage_transformers": storage_transformers,
237241
}
238242

239243
if attributes is not None:
@@ -244,9 +248,16 @@ def test_metadata_to_dict(
244248
metadata = ArrayV3Metadata.from_dict(metadata_dict)
245249
observed = metadata.to_dict()
246250
expected = metadata_dict.copy()
251+
252+
# if unset or None or (), storage_transformers gets normalized to ()
253+
assert observed["storage_transformers"] == ()
254+
observed.pop("storage_transformers")
255+
expected.pop("storage_transformers")
256+
247257
if attributes is None:
248258
assert observed["attributes"] == {}
249259
observed.pop("attributes")
260+
250261
if dimension_separator is None:
251262
if chunk_key_encoding == "default":
252263
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()

0 commit comments

Comments
 (0)