Skip to content

Update numcodecs tests and docs for zarr-python 3.0 #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ jobs:
uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0 # required for version resolution

- name: Set up Conda
uses: conda-incubator/setup-miniconda@v3.1.0
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ repos:
hooks:
- id: mypy
args: [--config-file, pyproject.toml]
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0b2']
additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0rc1']
2 changes: 2 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Fixes
~~~~~
* Fixes issue with ``Delta`` Zarr 3 codec not working with ``astype``.
By :user:`Norman Rzepka <normanrz>`, :issue:`664`
* Fixes issues with the upcoming ``zarr`` 3.0.0 release.
By :user:`Norman Rzepka <normanrz>`, :issue:`675`

* Removed Version Check: The previous code included a check for the `NumPy` version
and a warning if the version was incompatible with `zfpy`.
Expand Down
43 changes: 23 additions & 20 deletions docs/zarr3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ Zarr 3 codecs
.. automodule:: numcodecs.zarr3


Bytes-to-bytes codecs
---------------------
Compressors (bytes-to-bytes codecs)
-----------------------------------
.. autoclass:: Blosc()

.. autoattribute:: codec_name
Expand Down Expand Up @@ -40,58 +40,61 @@ Bytes-to-bytes codecs
.. autoattribute:: codec_name


Array-to-array codecs
---------------------
.. autoclass:: Delta()
Checksum codecs (bytes-to-bytes codecs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Need to be used as ``compressors`` in zarr-python.

.. autoclass:: CRC32()

.. autoattribute:: codec_name

.. autoclass:: BitRound()
.. autoclass:: CRC32C()

.. autoattribute:: codec_name

.. autoclass:: FixedScaleOffset()
.. autoclass:: Adler32()

.. autoattribute:: codec_name

.. autoclass:: Quantize()
.. autoclass:: Fletcher32()

.. autoattribute:: codec_name

.. autoclass:: PackBits()
.. autoclass:: JenkinsLookup3()

.. autoattribute:: codec_name

.. autoclass:: AsType()

.. autoattribute:: codec_name
Filters (array-to-array codecs)
-------------------------------
.. autoclass:: Delta()

.. autoattribute:: codec_name

Bytes-to-bytes checksum codecs
------------------------------
.. autoclass:: CRC32()
.. autoclass:: BitRound()

.. autoattribute:: codec_name

.. autoclass:: CRC32C()
.. autoclass:: FixedScaleOffset()

.. autoattribute:: codec_name

.. autoclass:: Adler32()
.. autoclass:: Quantize()

.. autoattribute:: codec_name

.. autoclass:: Fletcher32()
.. autoclass:: PackBits()

.. autoattribute:: codec_name

.. autoclass:: JenkinsLookup3()
.. autoclass:: AsType()

.. autoattribute:: codec_name


Array-to-bytes codecs
---------------------

Serializers (array-to-bytes codecs)
-----------------------------------
.. autoclass:: PCodec()

.. autoattribute:: codec_name
Expand Down
89 changes: 48 additions & 41 deletions numcodecs/tests/test_zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,19 @@
numcodecs.zarr3.Shuffle,
],
)
def test_generic_codec_class(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
def test_generic_compressor(
store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsBytesBytesCodec]
):
data = np.arange(0, 256, dtype="uint16").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[BytesCodec(), codec_class()],
compressors=[codec_class()],
)

a[:, :] = data.copy()
Expand All @@ -100,62 +102,61 @@
)
def test_generic_filter(
store: StorePath,
codec_class: type[numcodecs.zarr3._NumcodecsCodec],
codec_class: type[numcodecs.zarr3._NumcodecsArrayArrayCodec],
codec_config: dict[str, JSON],
):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[
filters=[
codec_class(**codec_config),
BytesCodec(),
],
)

a[:, :] = data.copy()
a = Array.open(store / "generic")
a = zarr.open_array(store / "generic", mode="r")
np.testing.assert_array_equal(data, a[:, :])


def test_generic_filter_bitround(store: StorePath):
data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic_bitround",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.BitRound(keepbits=3), BytesCodec()],
filters=[numcodecs.zarr3.BitRound(keepbits=3)],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_bitround")
a = zarr.open_array(store / "generic_bitround", mode="r")
assert np.allclose(data, a[:, :], atol=0.1)


def test_generic_filter_quantize(store: StorePath):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic_quantize",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.Quantize(digits=3), BytesCodec()],
filters=[numcodecs.zarr3.Quantize(digits=3)],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_quantize")
a = zarr.open_array(store / "generic_quantize", mode="r")
assert np.allclose(data, a[:, :], atol=0.001)


Expand All @@ -164,27 +165,27 @@
data[0:4, :] = True

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic_packbits",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
filters=[numcodecs.zarr3.PackBits()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_packbits")
a = zarr.open_array(store / "generic_packbits", mode="r")
np.testing.assert_array_equal(data, a[:, :])

with pytest.raises(ValueError, match=".*requires bool dtype.*"):
Array.create(
zarr.create_array(
store / "generic_packbits_err",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype="uint32",
fill_value=0,
codecs=[numcodecs.zarr3.PackBits(), BytesCodec()],
filters=[numcodecs.zarr3.PackBits()],
)


Expand All @@ -198,26 +199,30 @@
numcodecs.zarr3.JenkinsLookup3,
],
)
def test_generic_checksum(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
def test_generic_checksum(
store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsBytesBytesCodec]
):
data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic_checksum",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[BytesCodec(), codec_class()],
compressors=[codec_class()],
)

a[:, :] = data.copy()
a = Array.open(store / "generic_checksum")
a = zarr.open_array(store / "generic_checksum", mode="r")
np.testing.assert_array_equal(data, a[:, :])


@pytest.mark.parametrize("codec_class", [numcodecs.zarr3.PCodec, numcodecs.zarr3.ZFPY])
def test_generic_bytes_codec(store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsCodec]):
def test_generic_bytes_codec(
store: StorePath, codec_class: type[numcodecs.zarr3._NumcodecsArrayBytesCodec]
):
try:
codec_class()._codec # noqa: B018
except ValueError as e:
Expand All @@ -231,15 +236,13 @@
data = np.arange(0, 256, dtype="float32").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(

Check warning on line 239 in numcodecs/tests/test_zarr3.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/tests/test_zarr3.py#L239

Added line #L239 was not covered by tests
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[
codec_class(),
],
serializer=codec_class(),
)

a[:, :] = data.copy()
Expand All @@ -250,18 +253,22 @@
data = np.linspace(0, 10, 256, dtype="i8").reshape((16, 16))

with pytest.warns(UserWarning, match=EXPECTED_WARNING_STR):
a = Array.create(
a = zarr.create_array(
store / "generic",
shape=data.shape,
chunk_shape=(16, 16),
chunks=(16, 16),
dtype=data.dtype,
fill_value=0,
codecs=[
filters=[
numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type]
BytesCodec(),
],
)

a[:, :] = data.copy()
a = Array.open(store / "generic")
a = zarr.open_array(store / "generic", mode="r")
np.testing.assert_array_equal(data, a[:, :])


def test_repr():
codec = numcodecs.zarr3.LZ4(level=5)
assert repr(codec) == "LZ4(codec_name='numcodecs.lz4', codec_config={'level': 5})"
17 changes: 12 additions & 5 deletions numcodecs/zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
>>> import zarr
>>> import numcodecs.zarr3
>>>
>>> codecs = [zarr.codecs.BytesCodec(), numcodecs.zarr3.BZ2(level=5)]
>>> array = zarr.open(
... "data.zarr", mode="w",
... shape=(1024, 1024), chunks=(64, 64),
>>> array = zarr.create_array(
... store="data.zarr",
... shape=(1024, 1024),
... chunks=(64, 64),
... dtype="uint32",
... codecs=codecs)
... filters=[numcodecs.zarr3.Delta()],
... compressors=[numcodecs.zarr3.BZ2(level=5)])
>>> array[:] = np.arange(*array.shape).astype(array.dtype)

.. note::
Expand Down Expand Up @@ -119,6 +120,12 @@ def to_dict(self) -> dict[str, JSON]:
def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int:
raise NotImplementedError # pragma: no cover

# Override __repr__ because dynamically constructed classes don't seem to work otherwise
def __repr__(self) -> str:
codec_config = self.codec_config.copy()
codec_config.pop("id", None)
return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})"


class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec):
def __init__(self, **codec_config: JSON) -> None:
Expand Down
Loading