Open
Description
Zarr version
3.0.8
Numcodecs version
0.16.1
Python Version
3.13
Operating System
Linux (WSL2)
Installation
pip
Description
When I try to create an array with shards size larger than chunk size, I got an error.
Stored and computed checksum do not match. Stored: b'[#\xe5?'. Computed: b'"\xa7w\x8a'.
If the shard size equals the chunk size, the error disappears.
Test cases:
-
Shard == chunk
No error. -
Shard and chunk share one dimension (e.g. shards = (50, 200), chunks = (50, 100))
- With compressors=None: ~1% failure rate (1 failure in 100 runs)
- With any non-None compressor (including default): ~50% failure rate
-
Shard spans multiple chunks along each axis
Almost 100% failure rate, even with compressors=None.
Sorry, if it is a duplicate
Steps to reproduce
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "zarr@git+https://github.com/zarr-developers/zarr-python.git@main",
# ]
# ///
#
# This script automatically imports the development branch of zarr to check for issues
import zarr
import numpy as np
N_ROWS, N_COLS = 10_000, 2_000
CHUNK_ROW, CHUNK_COL = 1_000, 100
SHARD_ROW, SHARD_COL = 5_000, 500
ARRAY_PATH = "/tmp/tmp.zarr"
rng = np.random.default_rng(seed=42)
x = rng.random((N_ROWS, N_COLS))
zarr.create_array(
store=ARRAY_PATH,
data=x,
chunks=(CHUNK_ROW, CHUNK_COL),
shards=(SHARD_ROW, SHARD_COL),
overwrite=True,
)
array = zarr.open_array(ARRAY_PATH)[:]
Additional output
Full tracelog:
File "/home/rm/test_zarr/test_write.py", line 32, in <module>
zarr.create_array(
~~~~~~~~~~~~~~~~~^
store=ARRAY_PATH,
^^^^^^^^^^^^^^^^^
...<3 lines>...
overwrite=True,
^^^^^^^^^^^^^^^
)
^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/api/synchronous.py", line 888, in create_array
sync(
~~~~^
zarr.core.array.create_array(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
...<20 lines>...
)
^
)
^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/sync.py", line 163, in sync
raise return_result
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/sync.py", line 119, in _runner
return await coro
^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/array.py", line 4437, in create_array
return await from_array(
^^^^^^^^^^^^^^^^^
...<18 lines>...
)
^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/array.py", line 4077, in from_array
await concurrent_map(
...<3 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 69, in concurrent_map
return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 67, in run
return await func(*item)
^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/array.py", line 4074, in _copy_arraylike_region
await result.setitem(chunk_coords, _data[chunk_coords])
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/array.py", line 1489, in setitem
return await self._set_selection(indexer, value, prototype=prototype)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/array.py", line 1428, in _set_selection
await self.codec_pipeline.write(
...<12 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/codec_pipeline.py", line 479, in write
await concurrent_map(
...<6 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 69, in concurrent_map
return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 67, in run
return await func(*item)
^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/codec_pipeline.py", line 345, in write_batch
await self.encode_partial_batch(
...<4 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/codec_pipeline.py", line 242, in encode_partial_batch
await self.array_bytes_codec.encode_partial(batch_info)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/abc/codec.py", line 235, in encode_partial
await concurrent_map(
...<3 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 69, in concurrent_map
return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 67, in run
return await func(*item)
^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/codecs/sharding.py", line 589, in _encode_partial_single
await self._load_full_shard_maybe(
...<3 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/codecs/sharding.py", line 738, in _load_full_shard_maybe
await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/codecs/sharding.py", line 194, in from_bytes
obj.index = await codec._decode_shard_index(shard_index_bytes, chunks_per_shard)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/codecs/sharding.py", line 640, in _decode_shard_index
await get_pipeline_class()
...<3 lines>...
)
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/codec_pipeline.py", line 446, in decode
output.extend(await self.decode_batch(batch_info))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/codec_pipeline.py", line 185, in decode_batch
chunk_bytes_batch = await bb_codec.decode(
^^^^^^^^^^^^^^^^^^^^^^
zip(chunk_bytes_batch, chunk_spec_batch, strict=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/abc/codec.py", line 129, in decode
return await _batching_helper(self._decode_single, chunks_and_specs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/abc/codec.py", line 407, in _batching_helper
return await concurrent_map(
^^^^^^^^^^^^^^^^^^^^^
...<3 lines>...
)
^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 69, in concurrent_map
return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/core/common.py", line 67, in run
return await func(*item)
^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/abc/codec.py", line 420, in wrap
return await func(chunk, chunk_spec)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rm/test_zarr/.venv/lib/python3.13/site-packages/zarr/codecs/crc32c_.py", line 46, in _decode_single
raise ValueError(
f"Stored and computed checksum do not match. Stored: {stored_checksum!r}. Computed: {computed_checksum!r}."
)
ValueError: Stored and computed checksum do not match. Stored: b'lT\xf1q'. Computed: b'\rQjM'.