Closed
Description
Zarr version
3.0.1
Numcodecs version
0.15.0
Python Version
3.13
Operating System
Linux
Installation
Using pip into virtual environment
Description
Writing the same data using identical compressor settings in Zarr-Python 2.x and 3.x yields differences in compression results.
Using zarr==2.18.4, numcodecs==0.13.1
, this produces two chunks each of size 1.7 KiB on disk. The metadata is:
{
"attributes": {},
"chunk_grid": {
"chunk_shape": [
64,
64,
2
],
"separator": "/",
"type": "regular"
},
"chunk_memory_layout": "C",
"compressor": {
"codec": "https://purl.org/zarr/spec/codec/blosc/1.0",
"configuration": {
"blocksize": 0,
"clevel": 1,
"cname": "lz4",
"shuffle": 1
}
},
"data_type": "<f8",
"extensions": [],
"fill_value": 0.0,
"shape": [
64,
64,
4
]
}
Using zarr==3.0.1, numcodecs==0.15.0
, this produces two chunks of size 30 KiB.
{
"shape": [
64,
64,
4
],
"data_type": "float64",
"chunk_grid": {
"name": "regular",
"configuration": {
"chunk_shape": [
64,
64,
2
]
}
},
"chunk_key_encoding": {
"name": "default",
"configuration": {
"separator": "/"
}
},
"fill_value": 0.0,
"codecs": [
{
"name": "bytes",
"configuration": {
"endian": "little"
}
},
{
"name": "blosc",
"configuration": {
"typesize": 8,
"cname": "lz4",
"clevel": 1,
"shuffle": "shuffle",
"blocksize": 0
}
}
],
"attributes": {},
"zarr_format": 3,
"node_type": "array",
"storage_transformers": []
}
Steps to reproduce
MVCE
Run the following code in a 2.x and 3.x environment and inspect contents of /tmp/foo.zarr
.
import numpy as np
import zarr
store = "/tmp/foo.zarr"
shape = (64, 64, 4)
chunks = (64, 64, 2)
dtype = np.float64
cname = "lz4"
clevel = 1
if zarr.__version__[0] == "2":
import numcodecs.blosc
compressor = numcodecs.Blosc(
cname=cname,
clevel=clevel,
shuffle=numcodecs.blosc.SHUFFLE,
)
za = zarr.open(
store,
mode="w",
zarr_version=3,
shape=shape,
chunks=chunks,
dtype=dtype,
compressor=compressor,
)
else:
import zarr.codecs
compressors = zarr.codecs.BloscCodec(
cname=cname,
clevel=clevel,
shuffle=zarr.codecs.BloscShuffle.shuffle,
)
za = zarr.create_array(
store,
shape=shape,
chunks=chunks,
dtype=dtype,
zarr_format=3,
compressors=compressors,
)
arr = np.arange(np.prod(shape)).reshape(shape)
za[:] = arr
Additional output
No response