Skip to content

Commit 260cfbc

Browse files
committed
Merge branch 'main' into fix/v2meta_fill_serialization
* main: don't serialize empty tuples for v2 filters, and warn when reading such metadata (zarr-developers#2847)
2 parents 6301b15 + 96c9677 commit 260cfbc

File tree

4 files changed

+58
-28
lines changed

4 files changed

+58
-28
lines changed

changes/2847.fix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed a bug where ``ArrayV2Metadata`` could save ``filters`` as an empty array.

src/zarr/core/metadata/v2.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import base64
4+
import warnings
45
from collections.abc import Iterable
56
from enum import Enum
67
from functools import cached_property
@@ -189,6 +190,17 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
189190
# Filter the keys to only those expected by the constructor.
190191
expected = {x.name for x in fields(cls)}
191192
expected |= {"dtype", "chunks"}
193+
194+
# check if `filters` is an empty sequence; if so use None instead and raise a warning
195+
if _data["filters"] is not None and len(_data["filters"]) == 0:
196+
msg = (
197+
"Found an empty list of filters in the array metadata document. "
198+
"This is contrary to the Zarr V2 specification, and will cause an error in the future. "
199+
"Use None (or Null in a JSON document) instead of an empty list of filters."
200+
)
201+
warnings.warn(msg, UserWarning, stacklevel=1)
202+
_data["filters"] = None
203+
192204
_data = {k: v for k, v in _data.items() if k in expected}
193205

194206
return cls(**_data)
@@ -284,7 +296,11 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
284296
else:
285297
msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead."
286298
raise TypeError(msg)
287-
return tuple(out)
299+
if len(out) == 0:
300+
# Per the v2 spec, an empty tuple is not allowed -- use None to express "no filters"
301+
return None
302+
else:
303+
return tuple(out)
288304
# take a single codec instance and wrap it in a tuple
289305
if isinstance(data, numcodecs.abc.Codec):
290306
return (data,)

tests/test_array.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -972,45 +972,40 @@ def test_default_fill_value(dtype: str, fill_value_expected: object, store: Stor
972972
@staticmethod
973973
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
974974
@pytest.mark.parametrize("empty_value", [None, ()])
975-
async def test_no_filters_compressors(store: MemoryStore, dtype: str, empty_value: Any) -> None:
975+
async def test_no_filters_compressors(
976+
store: MemoryStore, dtype: str, empty_value: object, zarr_format: ZarrFormat
977+
) -> None:
976978
"""
977979
Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked.
978980
"""
979981

980-
# v2
981982
arr = await create_array(
982983
store=store,
983984
dtype=dtype,
984985
shape=(10,),
985-
zarr_format=2,
986+
zarr_format=zarr_format,
986987
compressors=empty_value,
987988
filters=empty_value,
988989
)
989990
# Test metadata explicitly
990-
assert arr.metadata.zarr_format == 2 # guard for mypy
991-
# The v2 metadata stores None and () separately
992-
assert arr.metadata.filters == empty_value
993-
# The v2 metadata does not allow tuple for compressor, therefore it is turned into None
994-
assert arr.metadata.compressor is None
995-
996-
assert arr.filters == ()
997-
assert arr.compressors == ()
998-
999-
# v3
1000-
arr = await create_array(
1001-
store=store,
1002-
dtype=dtype,
1003-
shape=(10,),
1004-
compressors=empty_value,
1005-
filters=empty_value,
1006-
)
1007-
assert arr.metadata.zarr_format == 3 # guard for mypy
1008-
if dtype == "str":
1009-
assert arr.metadata.codecs == (VLenUTF8Codec(),)
1010-
assert arr.serializer == VLenUTF8Codec()
991+
if zarr_format == 2:
992+
assert arr.metadata.zarr_format == 2 # guard for mypy
993+
# v2 spec requires that filters be either a collection with at least one filter, or None
994+
assert arr.metadata.filters is None
995+
# Compressor is a single element in v2 metadata; the absence of a compressor is encoded
996+
# as None
997+
assert arr.metadata.compressor is None
998+
999+
assert arr.filters == ()
1000+
assert arr.compressors == ()
10111001
else:
1012-
assert arr.metadata.codecs == (BytesCodec(),)
1013-
assert arr.serializer == BytesCodec()
1002+
assert arr.metadata.zarr_format == 3 # guard for mypy
1003+
if dtype == "str":
1004+
assert arr.metadata.codecs == (VLenUTF8Codec(),)
1005+
assert arr.serializer == VLenUTF8Codec()
1006+
else:
1007+
assert arr.metadata.codecs == (BytesCodec(),)
1008+
assert arr.serializer == BytesCodec()
10141009

10151010
@staticmethod
10161011
@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])

tests/test_metadata/test_v2.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_parse_zarr_format_invalid(data: Any) -> None:
3333

3434

3535
@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
36-
@pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)])
36+
@pytest.mark.parametrize("filters", [None, (numcodecs.GZip(),)])
3737
@pytest.mark.parametrize("compressor", [None, numcodecs.GZip()])
3838
@pytest.mark.parametrize("fill_value", [None, 0, 1])
3939
@pytest.mark.parametrize("order", ["C", "F"])
@@ -81,6 +81,24 @@ def test_metadata_to_dict(
8181
assert observed == expected
8282

8383

84+
def test_filters_empty_tuple_warns() -> None:
85+
metadata_dict = {
86+
"zarr_format": 2,
87+
"shape": (1,),
88+
"chunks": (1,),
89+
"dtype": "uint8",
90+
"order": "C",
91+
"compressor": None,
92+
"filters": (),
93+
"fill_value": 0,
94+
}
95+
with pytest.warns(
96+
UserWarning, match="Found an empty list of filters in the array metadata document."
97+
):
98+
meta = ArrayV2Metadata.from_dict(metadata_dict)
99+
assert meta.filters is None
100+
101+
84102
class TestConsolidated:
85103
@pytest.fixture
86104
async def v2_consolidated_metadata(

0 commit comments

Comments
 (0)