Skip to content

Commit 84d3284

Browse files
mpiannuccid-v-b
andauthored
Fix nan encoding in consolidated metadata (#2996)
* Fix nan encoding in consolidated metadata Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
1 parent f9b5a3b commit 84d3284

File tree

3 files changed

+42
-4
lines changed

3 files changed

+42
-4
lines changed

changes/2996.bugfix.rst

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be
2+
consistent with the behavior of `ArrayMetadata`.
3+
4+

src/zarr/core/group.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
)
5050
from zarr.core.config import config
5151
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
52-
from zarr.core.metadata.v3 import V3JsonEncoder
52+
from zarr.core.metadata.v3 import V3JsonEncoder, _replace_special_floats
5353
from zarr.core.sync import SyncMixin, sync
5454
from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError
5555
from zarr.storage import StoreLike, StorePath
@@ -334,7 +334,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
334334
if self.zarr_format == 3:
335335
return {
336336
ZARR_JSON: prototype.buffer.from_bytes(
337-
json.dumps(self.to_dict(), cls=V3JsonEncoder).encode()
337+
json.dumps(_replace_special_floats(self.to_dict()), cls=V3JsonEncoder).encode()
338338
)
339339
}
340340
else:
@@ -355,10 +355,10 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
355355
assert isinstance(consolidated_metadata, dict)
356356
for k, v in consolidated_metadata.items():
357357
attrs = v.pop("attributes", None)
358-
d[f"{k}/{ZATTRS_JSON}"] = attrs
358+
d[f"{k}/{ZATTRS_JSON}"] = _replace_special_floats(attrs)
359359
if "shape" in v:
360360
# it's an array
361-
d[f"{k}/{ZARRAY_JSON}"] = v
361+
d[f"{k}/{ZARRAY_JSON}"] = _replace_special_floats(v)
362362
else:
363363
d[f"{k}/{ZGROUP_JSON}"] = {
364364
"zarr_format": self.zarr_format,

tests/test_metadata/test_consolidated.py

+34
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,37 @@ async def test_use_consolidated_false(
573573
assert len([x async for x in good.members()]) == 2
574574
assert good.metadata.consolidated_metadata
575575
assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"]
576+
577+
578+
@pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf])
579+
async def test_consolidated_metadata_encodes_special_chars(
580+
memory_store: Store, zarr_format: ZarrFormat, fill_value: float
581+
):
582+
root = await group(store=memory_store, zarr_format=zarr_format)
583+
_child = await root.create_group("child", attributes={"test": fill_value})
584+
_time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value)
585+
await zarr.api.asynchronous.consolidate_metadata(memory_store)
586+
587+
root = await group(store=memory_store, zarr_format=zarr_format)
588+
root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype())
589+
590+
if zarr_format == 2:
591+
root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"]
592+
elif zarr_format == 3:
593+
root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[
594+
"consolidated_metadata"
595+
]["metadata"]
596+
597+
if np.isnan(fill_value):
598+
expected_fill_value = "NaN"
599+
elif np.isneginf(fill_value):
600+
expected_fill_value = "-Infinity"
601+
elif np.isinf(fill_value):
602+
expected_fill_value = "Infinity"
603+
604+
if zarr_format == 2:
605+
assert root_metadata["child/.zattrs"]["test"] == expected_fill_value
606+
assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value
607+
elif zarr_format == 3:
608+
assert root_metadata["child"]["attributes"]["test"] == expected_fill_value
609+
assert root_metadata["time"]["fill_value"] == expected_fill_value

0 commit comments

Comments
 (0)