4
4
import math
5
5
import operator
6
6
import re
7
- from collections .abc import MutableMapping
8
7
from functools import reduce
9
8
from typing import Any
10
9
11
10
import numpy as np
12
- from numcodecs .compat import ensure_bytes , ensure_ndarray
11
+ from numcodecs .compat import ensure_bytes
13
12
14
13
from zarr ._storage .store import _prefix_to_attrs_key , assert_zarr_v3_api_available
15
14
from zarr .attrs import Attributes
35
34
from zarr .storage import (
36
35
_get_hierarchy_metadata ,
37
36
_prefix_to_array_key ,
37
+ KVStore ,
38
38
getsize ,
39
39
listdir ,
40
40
normalize_store_arg ,
51
51
normalize_shape ,
52
52
normalize_storage_path ,
53
53
PartialReadBuffer ,
54
+ ensure_ndarray_like
54
55
)
55
56
56
57
@@ -98,6 +99,12 @@ class Array:
98
99
99
100
.. versionadded:: 2.11
100
101
102
+ meta_array : array-like, optional
103
+ An array instance to use for determining arrays to create and return
104
+ to users. Use `numpy.empty(())` by default.
105
+
106
+ .. versionadded:: 2.13
107
+
101
108
102
109
Attributes
103
110
----------
@@ -129,6 +136,7 @@ class Array:
129
136
vindex
130
137
oindex
131
138
write_empty_chunks
139
+ meta_array
132
140
133
141
Methods
134
142
-------
@@ -163,6 +171,7 @@ def __init__(
163
171
partial_decompress = False ,
164
172
write_empty_chunks = True ,
165
173
zarr_version = None ,
174
+ meta_array = None ,
166
175
):
167
176
# N.B., expect at this point store is fully initialized with all
168
177
# configuration metadata fully specified and normalized
@@ -191,8 +200,11 @@ def __init__(
191
200
self ._is_view = False
192
201
self ._partial_decompress = partial_decompress
193
202
self ._write_empty_chunks = write_empty_chunks
203
+ if meta_array is not None :
204
+ self ._meta_array = np .empty_like (meta_array , shape = ())
205
+ else :
206
+ self ._meta_array = np .empty (())
194
207
self ._version = zarr_version
195
-
196
208
if self ._version == 3 :
197
209
self ._data_key_prefix = 'data/root/' + self ._key_prefix
198
210
self ._data_path = 'data/root/' + self ._path
@@ -555,6 +567,13 @@ def write_empty_chunks(self) -> bool:
555
567
"""
556
568
return self ._write_empty_chunks
557
569
570
+ @property
571
+ def meta_array (self ):
572
+ """An array-like instance to use for determining arrays to create and return
573
+ to users.
574
+ """
575
+ return self ._meta_array
576
+
558
577
def __eq__ (self , other ):
559
578
return (
560
579
isinstance (other , Array ) and
@@ -929,7 +948,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None):
929
948
930
949
except KeyError :
931
950
# chunk not initialized
932
- chunk = np .zeros ( (), dtype = self ._dtype )
951
+ chunk = np .zeros_like ( self . _meta_array , shape = (), dtype = self ._dtype )
933
952
if self ._fill_value is not None :
934
953
chunk .fill (self ._fill_value )
935
954
@@ -1233,7 +1252,8 @@ def _get_selection(self, indexer, out=None, fields=None):
1233
1252
1234
1253
# setup output array
1235
1254
if out is None :
1236
- out = np .empty (out_shape , dtype = out_dtype , order = self ._order )
1255
+ out = np .empty_like (self ._meta_array , shape = out_shape ,
1256
+ dtype = out_dtype , order = self ._order )
1237
1257
else :
1238
1258
check_array_shape ('out' , out , out_shape )
1239
1259
@@ -1607,9 +1627,13 @@ def set_coordinate_selection(self, selection, value, fields=None):
1607
1627
# setup indexer
1608
1628
indexer = CoordinateIndexer (selection , self )
1609
1629
1610
- # handle value - need to flatten
1630
+ # handle value - need ndarray-like flatten value
1611
1631
if not is_scalar (value , self ._dtype ):
1612
- value = np .asanyarray (value )
1632
+ try :
1633
+ value = ensure_ndarray_like (value )
1634
+ except TypeError :
1635
+ # Handle types like `list` or `tuple`
1636
+ value = np .array (value , like = self ._meta_array )
1613
1637
if hasattr (value , 'shape' ) and len (value .shape ) > 1 :
1614
1638
value = value .reshape (- 1 )
1615
1639
@@ -1712,7 +1736,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None):
1712
1736
1713
1737
except KeyError :
1714
1738
# chunk not initialized
1715
- chunk = np .zeros ( (), dtype = self ._dtype )
1739
+ chunk = np .zeros_like ( self . _meta_array , shape = (), dtype = self ._dtype )
1716
1740
if self ._fill_value is not None :
1717
1741
chunk .fill (self ._fill_value )
1718
1742
@@ -1772,7 +1796,7 @@ def _set_selection(self, indexer, value, fields=None):
1772
1796
pass
1773
1797
else :
1774
1798
if not hasattr (value , 'shape' ):
1775
- value = np .asanyarray (value )
1799
+ value = np .asanyarray (value , like = self . _meta_array )
1776
1800
check_array_shape ('value' , value , sel_shape )
1777
1801
1778
1802
# iterate over chunks in range
@@ -1840,8 +1864,11 @@ def _process_chunk(
1840
1864
self ._dtype != object ):
1841
1865
1842
1866
dest = out [out_selection ]
1867
+ # Assume that array-like objects that doesn't have a
1868
+ # `writeable` flag is writable.
1869
+ dest_is_writable = getattr (dest , "writeable" , True )
1843
1870
write_direct = (
1844
- dest . flags . writeable and
1871
+ dest_is_writable and
1845
1872
(
1846
1873
(self ._order == 'C' and dest .flags .c_contiguous ) or
1847
1874
(self ._order == 'F' and dest .flags .f_contiguous )
@@ -1858,7 +1885,7 @@ def _process_chunk(
1858
1885
cdata = cdata .read_full ()
1859
1886
self ._compressor .decode (cdata , dest )
1860
1887
else :
1861
- chunk = ensure_ndarray (cdata ).view (self ._dtype )
1888
+ chunk = ensure_ndarray_like (cdata ).view (self ._dtype )
1862
1889
chunk = chunk .reshape (self ._chunks , order = self ._order )
1863
1890
np .copyto (dest , chunk )
1864
1891
return
@@ -1868,7 +1895,7 @@ def _process_chunk(
1868
1895
if partial_read_decode :
1869
1896
cdata .prepare_chunk ()
1870
1897
# size of chunk
1871
- tmp = np .empty ( self ._chunks , dtype = self .dtype )
1898
+ tmp = np .empty_like ( self . _meta_array , shape = self ._chunks , dtype = self .dtype )
1872
1899
index_selection = PartialChunkIterator (chunk_selection , self .chunks )
1873
1900
for start , nitems , partial_out_selection in index_selection :
1874
1901
expected_shape = [
@@ -1925,7 +1952,7 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
1925
1952
"""
1926
1953
out_is_ndarray = True
1927
1954
try :
1928
- out = ensure_ndarray (out )
1955
+ out = ensure_ndarray_like (out )
1929
1956
except TypeError :
1930
1957
out_is_ndarray = False
1931
1958
@@ -1960,7 +1987,7 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
1960
1987
"""
1961
1988
out_is_ndarray = True
1962
1989
try :
1963
- out = ensure_ndarray (out )
1990
+ out = ensure_ndarray_like (out )
1964
1991
except TypeError : # pragma: no cover
1965
1992
out_is_ndarray = False
1966
1993
@@ -2082,7 +2109,9 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
2082
2109
if is_scalar (value , self ._dtype ):
2083
2110
2084
2111
# setup array filled with value
2085
- chunk = np .empty (self ._chunks , dtype = self ._dtype , order = self ._order )
2112
+ chunk = np .empty_like (
2113
+ self ._meta_array , shape = self ._chunks , dtype = self ._dtype , order = self ._order
2114
+ )
2086
2115
chunk .fill (value )
2087
2116
2088
2117
else :
@@ -2102,14 +2131,18 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
2102
2131
2103
2132
# chunk not initialized
2104
2133
if self ._fill_value is not None :
2105
- chunk = np .empty (self ._chunks , dtype = self ._dtype , order = self ._order )
2134
+ chunk = np .empty_like (
2135
+ self ._meta_array , shape = self ._chunks , dtype = self ._dtype , order = self ._order
2136
+ )
2106
2137
chunk .fill (self ._fill_value )
2107
2138
elif self ._dtype == object :
2108
2139
chunk = np .empty (self ._chunks , dtype = self ._dtype , order = self ._order )
2109
2140
else :
2110
2141
# N.B., use zeros here so any region beyond the array has consistent
2111
2142
# and compressible data
2112
- chunk = np .zeros (self ._chunks , dtype = self ._dtype , order = self ._order )
2143
+ chunk = np .zeros_like (
2144
+ self ._meta_array , shape = self ._chunks , dtype = self ._dtype , order = self ._order
2145
+ )
2113
2146
2114
2147
else :
2115
2148
@@ -2159,7 +2192,7 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None):
2159
2192
chunk = f .decode (chunk )
2160
2193
2161
2194
# view as numpy array with correct dtype
2162
- chunk = ensure_ndarray (chunk )
2195
+ chunk = ensure_ndarray_like (chunk )
2163
2196
# special case object dtype, because incorrect handling can lead to
2164
2197
# segfaults and other bad things happening
2165
2198
if self ._dtype != object :
@@ -2186,7 +2219,7 @@ def _encode_chunk(self, chunk):
2186
2219
chunk = f .encode (chunk )
2187
2220
2188
2221
# check object encoding
2189
- if ensure_ndarray (chunk ).dtype == object :
2222
+ if ensure_ndarray_like (chunk ).dtype == object :
2190
2223
raise RuntimeError ('cannot write object array without object codec' )
2191
2224
2192
2225
# compress
@@ -2196,7 +2229,7 @@ def _encode_chunk(self, chunk):
2196
2229
cdata = chunk
2197
2230
2198
2231
# ensure in-memory data is immutable and easy to compare
2199
- if isinstance (self .chunk_store , MutableMapping ):
2232
+ if isinstance (self .chunk_store , KVStore ):
2200
2233
cdata = ensure_bytes (cdata )
2201
2234
2202
2235
return cdata
@@ -2354,12 +2387,22 @@ def hexdigest(self, hashname="sha1"):
2354
2387
return checksum
2355
2388
2356
2389
def __getstate__ (self ):
2357
- return (self ._store , self ._path , self ._read_only , self ._chunk_store ,
2358
- self ._synchronizer , self ._cache_metadata , self ._attrs .cache ,
2359
- self ._partial_decompress , self ._write_empty_chunks , self ._version )
2390
+ return {
2391
+ "store" : self ._store ,
2392
+ "path" : self ._path ,
2393
+ "read_only" : self ._read_only ,
2394
+ "chunk_store" : self ._chunk_store ,
2395
+ "synchronizer" : self ._synchronizer ,
2396
+ "cache_metadata" : self ._cache_metadata ,
2397
+ "cache_attrs" : self ._attrs .cache ,
2398
+ "partial_decompress" : self ._partial_decompress ,
2399
+ "write_empty_chunks" : self ._write_empty_chunks ,
2400
+ "zarr_version" : self ._version ,
2401
+ "meta_array" : self ._meta_array ,
2402
+ }
2360
2403
2361
2404
def __setstate__ (self , state ):
2362
- self .__init__ (* state )
2405
+ self .__init__ (** state )
2363
2406
2364
2407
def _synchronized_op (self , f , * args , ** kwargs ):
2365
2408
@@ -2466,7 +2509,7 @@ def append(self, data, axis=0):
2466
2509
2467
2510
Parameters
2468
2511
----------
2469
- data : array_like
2512
+ data : array-like
2470
2513
Data to be appended.
2471
2514
axis : int
2472
2515
Axis along which to append.
@@ -2502,7 +2545,7 @@ def _append_nosync(self, data, axis=0):
2502
2545
2503
2546
# ensure data is array-like
2504
2547
if not hasattr (data , 'shape' ):
2505
- data = np .asanyarray (data )
2548
+ data = np .asanyarray (data , like = self . _meta_array )
2506
2549
2507
2550
# ensure shapes are compatible for non-append dimensions
2508
2551
self_shape_preserved = tuple (s for i , s in enumerate (self ._shape )
0 commit comments