Skip to content

add warnings when using non-spec features with v3 #2556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ filterwarnings = [
"ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
"ignore:Creating a zarr.buffer.gpu.*:UserWarning",
"ignore:Duplicate name:UserWarning", # from ZipFile
"ignore:.*is currently not part in the Zarr version 3 specification.*:UserWarning",
]
markers = [
"gpu: mark a test as requiring CuPy and GPU"
Expand Down
9 changes: 9 additions & 0 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ async def consolidate_metadata(
v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={}))
members_metadata[k] = v

if any(m.zarr_format == 3 for m in members_metadata.values()):
warnings.warn(
"Consolidated metadata is currently not part in the Zarr version 3 specification. It "
"may not be supported by other zarr implementations and may change in the future.",
category=UserWarning,
stacklevel=1,
)

ConsolidatedMetadata._flat_to_nested(members_metadata)

consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata)
Expand All @@ -203,6 +211,7 @@ async def consolidate_metadata(
group,
metadata=metadata,
)

await group._save_metadata()
return group

Expand Down
19 changes: 19 additions & 0 deletions src/zarr/codecs/vlen_utf8.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dataclasses import dataclass
from typing import TYPE_CHECKING
from warnings import warn

import numpy as np
from numcodecs.vlen import VLenBytes, VLenUTF8
Expand All @@ -25,6 +26,15 @@

@dataclass(frozen=True)
class VLenUTF8Codec(ArrayBytesCodec):
def __init__(self) -> None:
warn(
"The codec `vlen-utf8` is currently not part in the Zarr version 3 specification. It "
"may not be supported by other zarr implementations and may change in the future.",
category=UserWarning,
stacklevel=2,
)
super().__init__()

@classmethod
def from_dict(cls, data: dict[str, JSON]) -> Self:
_, configuration_parsed = parse_named_configuration(
Expand Down Expand Up @@ -71,6 +81,15 @@ def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -

@dataclass(frozen=True)
class VLenBytesCodec(ArrayBytesCodec):
def __init__(self) -> None:
warn(
"The codec `vlen-bytes` is currently not part in the Zarr version 3 specification. It "
"may not be supported by other zarr implementations and may change in the future.",
category=UserWarning,
stacklevel=2,
)
super().__init__()

@classmethod
def from_dict(cls, data: dict[str, JSON]) -> Self:
_, configuration_parsed = parse_named_configuration(
Expand Down
13 changes: 11 additions & 2 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from itertools import starmap
from logging import getLogger
from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
from warnings import warn

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -539,7 +540,7 @@ async def _create_v3(
store_path: StorePath,
*,
shape: ShapeLike,
dtype: npt.DTypeLike,
dtype: np.dtype[Any],
chunk_shape: ChunkCoords,
fill_value: Any | None = None,
order: MemoryOrder | None = None,
Expand Down Expand Up @@ -580,6 +581,14 @@ async def _create_v3(
else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1])
)

if dtype.kind in "UTS":
warn(
f"The dtype `{dtype}` is currently not part in the Zarr version 3 specification. It "
"may not be supported by other zarr implementations and may change in the future.",
category=UserWarning,
stacklevel=2,
)

metadata = ArrayV3Metadata(
shape=shape,
data_type=dtype,
Expand All @@ -601,7 +610,7 @@ async def _create_v2(
store_path: StorePath,
*,
shape: ChunkCoords,
dtype: npt.DTypeLike,
dtype: np.dtype[Any],
chunks: ChunkCoords,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
Expand Down
10 changes: 5 additions & 5 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,14 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:

# we need to have special codecs if we are decoding vlen strings or bytestrings
# TODO: use codec ID instead of class name
codec_id = abc.__class__.__name__
if dtype == DataType.string and not codec_id == "VLenUTF8Codec":
codec_class_name = abc.__class__.__name__
if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec":
raise ValueError(
f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_id}`."
f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
)
if dtype == DataType.bytes and not codec_id == "VLenBytesCodec":
if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec":
raise ValueError(
f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_id}`."
f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`."
)


Expand Down
Loading