Skip to content

Commit 0e18671

Browse files
authored
Handle missing attributes key from metadata, and other fixes (#2058)
* Handle missing attributes key from metadata, allow null dimension_names elements, ensure that collections in metadata are tuples * correct assignment of fixture value to test condition * add v2 metadata dict roundtrip test * alter test to expose failed handling of missing separator for v2 chunk key encoding * fix up chunk key encoding from dict
1 parent cbc0887 commit 0e18671

File tree

6 files changed

+158
-19
lines changed

6 files changed

+158
-19
lines changed

src/zarr/abc/metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def to_dict(self) -> JSON:
3030
elif isinstance(value, str):
3131
out_dict[key] = value
3232
elif isinstance(value, Sequence):
33-
out_dict[key] = [v.to_dict() if isinstance(v, Metadata) else v for v in value]
33+
out_dict[key] = tuple(v.to_dict() if isinstance(v, Metadata) else v for v in value)
3434
else:
3535
out_dict[key] = value
3636

src/zarr/chunk_grids.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _from_dict(cls, data: dict[str, JSON]) -> Self:
131131
return cls(**configuration_parsed) # type: ignore[arg-type]
132132

133133
def to_dict(self) -> dict[str, JSON]:
134-
return {"name": "regular", "configuration": {"chunk_shape": list(self.chunk_shape)}}
134+
return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}}
135135

136136
def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]:
137137
return itertools.product(

src/zarr/chunk_key_encodings.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,20 @@ def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncoding) -> ChunkKeyEncoding
3838
if isinstance(data, ChunkKeyEncoding):
3939
return data
4040

41-
name_parsed, configuration_parsed = parse_named_configuration(data)
41+
# configuration is optional for chunk key encodings
42+
name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False)
4243
if name_parsed == "default":
43-
return DefaultChunkKeyEncoding(**configuration_parsed) # type: ignore[arg-type]
44+
if config_parsed is None:
45+
# for default, normalize missing configuration to use the "/" separator.
46+
config_parsed = {"separator": "/"}
47+
return DefaultChunkKeyEncoding(**config_parsed) # type: ignore[arg-type]
4448
if name_parsed == "v2":
45-
return V2ChunkKeyEncoding(**configuration_parsed) # type: ignore[arg-type]
46-
raise ValueError(f"Unknown chunk key encoding. Got {name_parsed}.")
49+
if config_parsed is None:
50+
# for v2, normalize missing configuration to use the "." separator.
51+
config_parsed = {"separator": "."}
52+
return V2ChunkKeyEncoding(**config_parsed) # type: ignore[arg-type]
53+
msg = f"Unknown chunk key encoding. Got {name_parsed}, expected one of ('v2', 'default')."
54+
raise ValueError(msg)
4755

4856
def to_dict(self) -> dict[str, JSON]:
4957
return {"name": self.name, "configuration": {"separator": self.separator}}

src/zarr/metadata.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -283,15 +283,19 @@ def _json_convert(o: Any) -> Any:
283283

284284
@classmethod
285285
def from_dict(cls, data: dict[str, JSON]) -> ArrayV3Metadata:
286+
# make a copy because we are modifying the dict
287+
_data = data.copy()
286288
# TODO: Remove the type: ignores[] comments below and use a TypedDict to type `data`
287289
# check that the zarr_format attribute is correct
288-
_ = parse_zarr_format_v3(data.pop("zarr_format")) # type: ignore[arg-type]
290+
_ = parse_zarr_format_v3(_data.pop("zarr_format")) # type: ignore[arg-type]
289291
# check that the node_type attribute is correct
290-
_ = parse_node_type_array(data.pop("node_type")) # type: ignore[arg-type]
292+
_ = parse_node_type_array(_data.pop("node_type")) # type: ignore[arg-type]
291293

292-
data["dimension_names"] = data.pop("dimension_names", None)
293-
294-
return cls(**data) # type: ignore[arg-type]
294+
# dimension_names key is optional, normalize missing to `None`
295+
_data["dimension_names"] = _data.pop("dimension_names", None)
296+
# attributes key is optional, normalize missing to `None`
297+
_data["attributes"] = _data.pop("attributes", None)
298+
return cls(**_data) # type: ignore[arg-type]
295299

296300
def to_dict(self) -> dict[str, Any]:
297301
out_dict = super().to_dict()
@@ -407,9 +411,11 @@ def _json_convert(
407411

408412
@classmethod
409413
def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
414+
# make a copy to protect the original from modification
415+
_data = data.copy()
410416
# check that the zarr_format attribute is correct
411-
_ = parse_zarr_format_v2(data.pop("zarr_format"))
412-
return cls(**data)
417+
_ = parse_zarr_format_v2(_data.pop("zarr_format"))
418+
return cls(**_data)
413419

414420
def to_dict(self) -> JSON:
415421
zarray_dict = super().to_dict()
@@ -446,10 +452,10 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
446452
return replace(self, attributes=attributes)
447453

448454

449-
def parse_dimension_names(data: None | Iterable[str]) -> tuple[str, ...] | None:
455+
def parse_dimension_names(data: None | Iterable[str | None]) -> tuple[str | None, ...] | None:
450456
if data is None:
451457
return data
452-
elif all(isinstance(x, str) for x in data):
458+
elif all(isinstance(x, type(None) | str) for x in data):
453459
return tuple(data)
454460
else:
455461
msg = f"Expected either None or a iterable of str, got {type(data)}"

tests/v3/test_metadata/test_v2.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING
3+
from typing import TYPE_CHECKING, Literal
4+
5+
from zarr.abc.codec import Codec
46

57
if TYPE_CHECKING:
68
from typing import Any
79

810
import pytest
911

10-
from zarr.metadata import parse_zarr_format_v2
12+
from zarr.codecs import GzipCodec
13+
from zarr.metadata import ArrayV2Metadata, parse_zarr_format_v2
1114

1215

1316
def test_parse_zarr_format_valid() -> None:
@@ -18,3 +21,52 @@ def test_parse_zarr_format_valid() -> None:
1821
def test_parse_zarr_format_invalid(data: Any) -> None:
1922
with pytest.raises(ValueError, match=f"Invalid value. Expected 2. Got {data}"):
2023
parse_zarr_format_v2(data)
24+
25+
26+
@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
27+
@pytest.mark.parametrize("filters", [(), (GzipCodec().to_dict())])
28+
@pytest.mark.parametrize("compressor", [None, GzipCodec().to_dict()])
29+
@pytest.mark.parametrize("fill_value", [0, 1])
30+
@pytest.mark.parametrize("order", ["C", "F"])
31+
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
32+
def test_metadata_to_dict(
33+
compressor: Codec | None,
34+
filters: list[Codec] | None,
35+
fill_value: Any,
36+
order: Literal["C", "F"],
37+
dimension_separator: Literal[".", "/"] | None,
38+
attributes: None | dict[str, Any],
39+
) -> None:
40+
shape = (1, 2, 3)
41+
chunks = (1,) * len(shape)
42+
data_type = "|u1"
43+
metadata_dict = {
44+
"zarr_format": 2,
45+
"shape": shape,
46+
"chunks": chunks,
47+
"dtype": data_type,
48+
"order": order,
49+
"compressor": compressor,
50+
"filters": filters,
51+
"fill_value": fill_value,
52+
}
53+
54+
if attributes is not None:
55+
metadata_dict["attributes"] = attributes
56+
if dimension_separator is not None:
57+
metadata_dict["dimension_separator"] = dimension_separator
58+
59+
metadata = ArrayV2Metadata.from_dict(metadata_dict)
60+
observed = metadata.to_dict()
61+
expected = metadata_dict.copy()
62+
63+
if attributes is None:
64+
assert observed["attributes"] == {}
65+
observed.pop("attributes")
66+
67+
if dimension_separator is None:
68+
expected_dimension_sep = "."
69+
assert observed["dimension_separator"] == expected_dimension_sep
70+
observed.pop("dimension_separator")
71+
72+
assert observed == expected

tests/v3/test_metadata/test_v3.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from __future__ import annotations
22

33
import re
4-
from typing import TYPE_CHECKING
4+
from typing import TYPE_CHECKING, Literal
5+
6+
from zarr.abc.codec import Codec
7+
from zarr.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
8+
from zarr.codecs.bytes import BytesCodec
59

610
if TYPE_CHECKING:
711
from typing import Any
@@ -11,7 +15,7 @@
1115
import numpy as np
1216
import pytest
1317

14-
from zarr.metadata import parse_dimension_names
18+
from zarr.metadata import ArrayV3Metadata, parse_dimension_names
1519
from zarr.metadata import parse_fill_value_v3 as parse_fill_value
1620
from zarr.metadata import parse_zarr_format_v3 as parse_zarr_format
1721

@@ -157,3 +161,72 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
157161
match = f"Cannot parse non-string sequence {fill_value} as a scalar with type {dtype}"
158162
with pytest.raises(TypeError, match=re.escape(match)):
159163
parse_fill_value(fill_value, dtype)
164+
165+
166+
@pytest.mark.parametrize("chunk_grid", ["regular"])
167+
@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
168+
@pytest.mark.parametrize("codecs", [[BytesCodec()]])
169+
@pytest.mark.parametrize("fill_value", [0, 1])
170+
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
171+
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
172+
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
173+
def test_metadata_to_dict(
174+
chunk_grid: str,
175+
codecs: list[Codec],
176+
fill_value: Any,
177+
chunk_key_encoding: Literal["v2", "default"],
178+
dimension_separator: Literal[".", "/"] | None,
179+
dimension_names: Literal["nones", "strings", "missing"],
180+
attributes: None | dict[str, Any],
181+
) -> None:
182+
shape = (1, 2, 3)
183+
data_type = "uint8"
184+
if chunk_grid == "regular":
185+
cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}}
186+
187+
cke: dict[str, Any]
188+
cke_name_dict = {"name": chunk_key_encoding}
189+
if dimension_separator is not None:
190+
cke = cke_name_dict | {"configuration": {"separator": dimension_separator}}
191+
else:
192+
cke = cke_name_dict
193+
dnames: tuple[str | None, ...] | None
194+
195+
if dimension_names == "strings":
196+
dnames = tuple(map(str, range(len(shape))))
197+
elif dimension_names == "missing":
198+
dnames = None
199+
elif dimension_names == "nones":
200+
dnames = (None,) * len(shape)
201+
202+
metadata_dict = {
203+
"zarr_format": 3,
204+
"node_type": "array",
205+
"shape": shape,
206+
"chunk_grid": cgrid,
207+
"data_type": data_type,
208+
"chunk_key_encoding": cke,
209+
"codecs": tuple(c.to_dict() for c in codecs),
210+
"fill_value": fill_value,
211+
}
212+
213+
if attributes is not None:
214+
metadata_dict["attributes"] = attributes
215+
if dnames is not None:
216+
metadata_dict["dimension_names"] = dnames
217+
218+
metadata = ArrayV3Metadata.from_dict(metadata_dict)
219+
observed = metadata.to_dict()
220+
expected = metadata_dict.copy()
221+
if attributes is None:
222+
assert observed["attributes"] == {}
223+
observed.pop("attributes")
224+
if dimension_separator is None:
225+
if chunk_key_encoding == "default":
226+
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
227+
else:
228+
expected_cke_dict = V2ChunkKeyEncoding(separator=".").to_dict()
229+
assert observed["chunk_key_encoding"] == expected_cke_dict
230+
observed.pop("chunk_key_encoding")
231+
expected.pop("chunk_key_encoding")
232+
assert observed == expected

0 commit comments

Comments
 (0)