Skip to content

Commit f13e954

Browse files
Ignore extra keys in v2 metadata (#2297)
* Ignore extra keys in v2 metadata Ignore unexpected keys in Zarr V2 metadata, to enable reading zarr files written by other systems, which might store additional data in the top level of the `.zgroup` and `.zarray` files` Closes #2296
1 parent b1c4e47 commit f13e954

File tree

5 files changed

+61
-2
lines changed

5 files changed

+61
-2
lines changed

src/zarr/core/group.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import asyncio
44
import json
55
import logging
6-
from dataclasses import asdict, dataclass, field, replace
6+
from dataclasses import asdict, dataclass, field, fields, replace
77
from typing import TYPE_CHECKING, Literal, cast, overload
88

99
import numpy as np
@@ -116,6 +116,15 @@ def __init__(
116116
@classmethod
117117
def from_dict(cls, data: dict[str, Any]) -> GroupMetadata:
118118
assert data.pop("node_type", None) in ("group", None)
119+
120+
zarr_format = data.get("zarr_format")
121+
if zarr_format == 2 or zarr_format is None:
122+
# zarr v2 allowed arbitrary keys here.
123+
# We don't want the GroupMetadata constructor to fail just because someone put an
124+
# extra key in the metadata.
125+
expected = {x.name for x in fields(cls)}
126+
data = {k: v for k, v in data.items() if k in expected}
127+
119128
return cls(**data)
120129

121130
def to_dict(self) -> dict[str, Any]:

src/zarr/core/metadata/v2.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from zarr.core.common import JSON, ChunkCoords
1414

1515
import json
16-
from dataclasses import dataclass, field, replace
16+
from dataclasses import dataclass, field, fields, replace
1717

1818
import numcodecs
1919
import numpy as np
@@ -140,6 +140,17 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
140140
_data = data.copy()
141141
# check that the zarr_format attribute is correct
142142
_ = parse_zarr_format(_data.pop("zarr_format"))
143+
144+
# zarr v2 allowed arbitrary keys here.
145+
# We don't want the ArrayV2Metadata constructor to fail just because someone put an
146+
# extra key in the metadata.
147+
expected = {x.name for x in fields(cls)}
148+
# https://github.com/zarr-developers/zarr-python/issues/2269
149+
# handle the renames
150+
expected |= {"dtype", "chunks"}
151+
152+
_data = {k: v for k, v in _data.items() if k in expected}
153+
143154
return cls(**_data)
144155

145156
def to_dict(self) -> dict[str, JSON]:

tests/v3/test_array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pytest
77

88
import zarr.api.asynchronous
9+
import zarr.storage
910
from zarr import Array, AsyncArray, Group
1011
from zarr.codecs.bytes import BytesCodec
1112
from zarr.core.array import chunks_initialized

tests/v3/test_group.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,3 +963,15 @@ async def test_open_mutable_mapping():
963963
def test_open_mutable_mapping_sync():
964964
group = zarr.open_group(store={}, mode="w")
965965
assert isinstance(group.store_path.store, MemoryStore)
966+
967+
968+
class TestGroupMetadata:
969+
def test_from_dict_extra_fields(self):
970+
data = {
971+
"attributes": {"key": "value"},
972+
"_nczarr_superblock": {"version": "2.0.0"},
973+
"zarr_format": 2,
974+
}
975+
result = GroupMetadata.from_dict(data)
976+
expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2)
977+
assert result == expected

tests/v3/test_metadata/test_v2.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,29 @@ def test_metadata_to_dict(
7272
observed.pop("dimension_separator")
7373

7474
assert observed == expected
75+
76+
77+
def test_from_dict_extra_fields() -> None:
78+
data = {
79+
"_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"},
80+
"attributes": {"key": "value"},
81+
"chunks": [8],
82+
"compressor": None,
83+
"dtype": "<f8",
84+
"fill_value": 0.0,
85+
"filters": None,
86+
"order": "C",
87+
"shape": [8],
88+
"zarr_format": 2,
89+
}
90+
91+
result = ArrayV2Metadata.from_dict(data)
92+
expected = ArrayV2Metadata(
93+
attributes={"key": "value"},
94+
shape=(8,),
95+
dtype="float64",
96+
chunks=(8,),
97+
fill_value=0.0,
98+
order="C",
99+
)
100+
assert result == expected

0 commit comments

Comments
 (0)