110
110
_parse_array_array_codec ,
111
111
_parse_array_bytes_codec ,
112
112
_parse_bytes_bytes_codec ,
113
- _resolve_codec ,
114
113
get_pipeline_class ,
115
114
)
116
115
from zarr .storage import StoreLike , make_store_path
@@ -469,7 +468,8 @@ async def create(
469
468
- For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``.
470
469
- For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``.
471
470
472
- These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
471
+ These defaults can be changed by modifying the value of ``array.v3_default_filters``,
472
+ ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`.
473
473
dimension_names : Iterable[str], optional
474
474
The names of the dimensions (default is None).
475
475
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
@@ -1715,7 +1715,8 @@ def create(
1715
1715
- For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``.
1716
1716
- For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``.
1717
1717
1718
- These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
1718
+ These defaults can be changed by modifying the value of ``array.v3_default_filters``,
1719
+ ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`.
1719
1720
dimension_names : Iterable[str], optional
1720
1721
The names of the dimensions (default is None).
1721
1722
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
@@ -3698,17 +3699,9 @@ def _build_parents(
3698
3699
3699
3700
def _get_default_codecs (
3700
3701
np_dtype : np .dtype [Any ],
3701
- ) -> list [dict [str , JSON ]]:
3702
- default_codecs = zarr_config .get ("array.v3_default_codecs" )
3703
- dtype = DataType .from_numpy (np_dtype )
3704
- if dtype == DataType .string :
3705
- dtype_key = "string"
3706
- elif dtype == DataType .bytes :
3707
- dtype_key = "bytes"
3708
- else :
3709
- dtype_key = "numeric"
3710
-
3711
- return cast (list [dict [str , JSON ]], default_codecs [dtype_key ])
3702
+ ) -> tuple [Codec , ...]:
3703
+ filters , serializer , compressors = _get_default_chunk_encoding_v3 (np_dtype )
3704
+ return filters + (serializer ,) + compressors
3712
3705
3713
3706
3714
3707
FiltersLike : TypeAlias = (
@@ -3785,9 +3778,8 @@ async def create_array(
3785
3778
For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
3786
3779
and these values must be instances of ``ArrayArrayCodec``, or dict representations
3787
3780
of ``ArrayArrayCodec``.
3788
- If ``filters`` and ``compressors`` are not specified, then the default codecs for
3789
- Zarr format 3 will be used.
3790
- These defaults can be changed by modifying the value of ``array.v3_default_codecs``
3781
+ If no ``filters`` are provided, a default set of filters will be used.
3782
+ These defaults can be changed by modifying the value of ``array.v3_default_filters``
3791
3783
in :mod:`zarr.core.config`.
3792
3784
Use ``None`` to omit default filters.
3793
3785
@@ -3803,22 +3795,22 @@ async def create_array(
3803
3795
3804
3796
For Zarr format 3, a "compressor" is a codec that takes a bytestream, and
3805
3797
returns another bytestream. Multiple compressors my be provided for Zarr format 3.
3806
- If ``filters`` and ``compressors`` are not specified, then the default codecs for
3807
- Zarr format 3 will be used.
3808
- These defaults can be changed by modifying the value of ``array.v3_default_codecs``
3798
+ If no ``compressors`` are provided, a default set of compressors will be used.
3799
+ These defaults can be changed by modifying the value of ``array.v3_default_compressors``
3809
3800
in :mod:`zarr.core.config`.
3810
3801
Use ``None`` to omit default compressors.
3811
3802
3812
3803
For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may
3813
3804
be provided for Zarr format 2.
3814
- If no ``compressors`` are provided, a default compressor will be used.
3815
- These defaults can be changed by modifying the value of ``array.v2_default_compressor``
3805
+ If no ``compressor`` is provided, a default compressor will be used.
3816
3806
in :mod:`zarr.core.config`.
3817
3807
Use ``None`` to omit the default compressor.
3818
3808
serializer : dict[str, JSON] | ArrayBytesCodec, optional
3819
3809
Array-to-bytes codec to use for encoding the array data.
3820
3810
Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion.
3821
- If no ``serializer`` is provided, the `zarr.codecs.BytesCodec` codec will be used.
3811
+ If no ``serializer`` is provided, a default serializer will be used.
3812
+ These defaults can be changed by modifying the value of ``array.v3_default_serializer``
3813
+ in :mod:`zarr.core.config`.
3822
3814
fill_value : Any, optional
3823
3815
Fill value for the array.
3824
3816
order : {"C", "F"}, optional
@@ -3997,7 +3989,6 @@ def _get_default_chunk_encoding_v3(
3997
3989
"""
3998
3990
Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
3999
3991
"""
4000
- default_codecs = zarr_config .get ("array.v3_default_codecs" )
4001
3992
dtype = DataType .from_numpy (np_dtype )
4002
3993
if dtype == DataType .string :
4003
3994
dtype_key = "string"
@@ -4006,31 +3997,15 @@ def _get_default_chunk_encoding_v3(
4006
3997
else :
4007
3998
dtype_key = "numeric"
4008
3999
4009
- codec_dicts = default_codecs [dtype_key ]
4010
- codecs = tuple (_resolve_codec (c ) for c in codec_dicts )
4011
- array_bytes_maybe = None
4012
- array_array : list [ArrayArrayCodec ] = []
4013
- bytes_bytes : list [BytesBytesCodec ] = []
4014
-
4015
- for codec in codecs :
4016
- if isinstance (codec , ArrayBytesCodec ):
4017
- if array_bytes_maybe is not None :
4018
- raise ValueError (
4019
- f"Got two instances of ArrayBytesCodec: { array_bytes_maybe } and { codec } . "
4020
- "Only one array-to-bytes codec is allowed."
4021
- )
4022
- array_bytes_maybe = codec
4023
- elif isinstance (codec , ArrayArrayCodec ):
4024
- array_array .append (codec )
4025
- elif isinstance (codec , BytesBytesCodec ):
4026
- bytes_bytes .append (codec )
4027
- else :
4028
- raise TypeError (f"Unexpected codec type: { type (codec )} " )
4000
+ default_filters = zarr_config .get ("array.v3_default_filters" ).get (dtype_key )
4001
+ default_serializer = zarr_config .get ("array.v3_default_serializer" ).get (dtype_key )
4002
+ default_compressors = zarr_config .get ("array.v3_default_compressors" ).get (dtype_key )
4029
4003
4030
- if array_bytes_maybe is None :
4031
- raise ValueError ("Required ArrayBytesCodec was not found." )
4004
+ filters = tuple (_parse_array_array_codec (codec_dict ) for codec_dict in default_filters )
4005
+ serializer = _parse_array_bytes_codec (default_serializer )
4006
+ compressors = tuple (_parse_bytes_bytes_codec (codec_dict ) for codec_dict in default_compressors )
4032
4007
4033
- return tuple ( array_array ), array_bytes_maybe , tuple ( bytes_bytes )
4008
+ return filters , serializer , compressors
4034
4009
4035
4010
4036
4011
def _get_default_chunk_encoding_v2 (
@@ -4111,34 +4086,15 @@ def _parse_chunk_encoding_v3(
4111
4086
default_array_array , default_array_bytes , default_bytes_bytes = _get_default_chunk_encoding_v3 (
4112
4087
dtype
4113
4088
)
4114
- maybe_bytes_bytes : Iterable [Codec | dict [str , JSON ]]
4115
- maybe_array_array : Iterable [Codec | dict [str , JSON ]]
4116
- out_bytes_bytes : tuple [BytesBytesCodec , ...]
4117
- if compressors is None :
4118
- out_bytes_bytes = ()
4119
-
4120
- elif compressors == "auto" :
4121
- out_bytes_bytes = default_bytes_bytes
4122
4089
4123
- else :
4124
- if isinstance (compressors , dict | Codec ):
4125
- maybe_bytes_bytes = (compressors ,)
4126
- elif compressors is None :
4127
- maybe_bytes_bytes = ()
4128
- else :
4129
- maybe_bytes_bytes = cast (Iterable [Codec | dict [str , JSON ]], compressors )
4130
-
4131
- out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
4132
- out_array_array : tuple [ArrayArrayCodec , ...]
4133
4090
if filters is None :
4134
- out_array_array = ()
4091
+ out_array_array : tuple [ ArrayArrayCodec , ...] = ()
4135
4092
elif filters == "auto" :
4136
4093
out_array_array = default_array_array
4137
4094
else :
4095
+ maybe_array_array : Iterable [Codec | dict [str , JSON ]]
4138
4096
if isinstance (filters , dict | Codec ):
4139
4097
maybe_array_array = (filters ,)
4140
- elif filters is None :
4141
- maybe_array_array = ()
4142
4098
else :
4143
4099
maybe_array_array = cast (Iterable [Codec | dict [str , JSON ]], filters )
4144
4100
out_array_array = tuple (_parse_array_array_codec (c ) for c in maybe_array_array )
@@ -4148,6 +4104,19 @@ def _parse_chunk_encoding_v3(
4148
4104
else :
4149
4105
out_array_bytes = _parse_array_bytes_codec (serializer )
4150
4106
4107
+ if compressors is None :
4108
+ out_bytes_bytes : tuple [BytesBytesCodec , ...] = ()
4109
+ elif compressors == "auto" :
4110
+ out_bytes_bytes = default_bytes_bytes
4111
+ else :
4112
+ maybe_bytes_bytes : Iterable [Codec | dict [str , JSON ]]
4113
+ if isinstance (compressors , dict | Codec ):
4114
+ maybe_bytes_bytes = (compressors ,)
4115
+ else :
4116
+ maybe_bytes_bytes = cast (Iterable [Codec | dict [str , JSON ]], compressors )
4117
+
4118
+ out_bytes_bytes = tuple (_parse_bytes_bytes_codec (c ) for c in maybe_bytes_bytes )
4119
+
4151
4120
return out_array_array , out_array_bytes , out_bytes_bytes
4152
4121
4153
4122
0 commit comments