@@ -171,6 +171,43 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
171
171
end
172
172
173
173
end
174
+
175
+ function [bucketName , objectPath ] = extractS3BucketNameAndPath(url )
176
+ % Helper function to extract S3 bucket name and path to file
177
+ % bucketName and objectPath are needed to fill the KVstore hash
178
+ % map for tensorstore.
179
+ % Define the regular expression patterns for matching S3 URLs and URIs
180
+ % S3 URLs can have the following patterns.
181
+ patterns = { ...
182
+ ' ^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$' , ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
183
+ ' ^https://([^.]+)\.s3\.amazonaws\.com/(.+)$' , ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
184
+ ' ^https://([^.]+)\.s3\.[^/]+/(.+)$' , ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
185
+ ' ^https://s3\.amazonaws\.com/([^/]+)/(.+)$' , ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
186
+ ' ^https://s3\.[^/]+/([^/]+)/(.+)$' , ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
187
+ ' ^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
188
+ };
189
+
190
+ % For each pattern, specify which group is bucket and which is path
191
+ % regexp will extract multiple tokens from the patterns above.
192
+ % For each pattern, the indices below denote the location of
193
+ % the bucket and the path name.
194
+ bucketIdx = [1 , 1 , 1 , 1 , 1 , 1 ];
195
+ pathIdx = [3 , 2 , 2 , 2 , 2 , 2 ];
196
+
197
+ % Iterate through the patterns and identify the pattern which matches the
198
+ % URI. Extract the bucket name and the path.
199
+ for patternIdx = 1 : numel(patterns )
200
+ tokens = regexp(url , patterns{patternIdx }, ' tokens' );
201
+ if ~isempty(tokens )
202
+ t = tokens{1 };
203
+ bucketName = t{bucketIdx(patternIdx )};
204
+ objectPath = t{pathIdx(patternIdx )};
205
+ return ;
206
+ end
207
+ end
208
+
209
+ error(" MATLAB:Zarr:invalidS3URL" ," Invalid S3 URI format." );
210
+ end
174
211
end
175
212
176
213
methods
@@ -183,7 +220,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
183
220
obj.isRemote = matlab .io .internal .vfs .validators .hasIriPrefix(obj .Path );
184
221
if obj .isRemote % Remote file (only S3 support at the moment)
185
222
% Extract the S3 bucket name and path
186
- [bucketName , objectPath ] = obj .extractS3BucketNameAndPath(obj .Path );
223
+ [bucketName , objectPath ] = Zarr .extractS3BucketNameAndPath(obj .Path );
187
224
% Create a Python dictionary for the KV store driver
188
225
obj.KVStoreSchema = py .ZarrPy .createKVStore(obj .isRemote , objectPath , bucketName );
189
226
@@ -241,7 +278,13 @@ function create(obj, dtype, data_size, chunk_size, fillvalue, compression)
241
278
if isempty(fillvalue )
242
279
obj.FillValue = py .None ;
243
280
else
244
- obj.FillValue = cast(fillvalue , obj .Datatype .MATLABType );
281
+ % Fill value must be of the same datatype as data.
282
+ if ~isa(fillvalue , dtype )
283
+ error(" MATLAB:zarrcreate:invalidFillValueType" ,...
284
+ " FillValue must be of the same datatype as data ("" %s"" )." ,...
285
+ dtype )
286
+ end
287
+ obj.FillValue = fillvalue ;
245
288
end
246
289
247
290
% see how much of the provided path exists already
@@ -334,42 +377,7 @@ function write(obj, data)
334
377
end
335
378
end
336
379
337
- function [bucketName , objectPath ] = extractS3BucketNameAndPath(~,url )
338
- % Helper function to extract S3 bucket name and path to file
339
- % bucketName and objectPath are needed to fill the KVstore hash
340
- % map for tensorstore.
341
- % Define the regular expression patterns for matching S3 URLs and URIs
342
- % S3 URLs can have the following patterns.
343
- patterns = { ...
344
- ' ^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$' , ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
345
- ' ^https://([^.]+)\.s3\.amazonaws\.com/(.+)$' , ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
346
- ' ^https://([^.]+)\.s3\.[^/]+/(.+)$' , ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
347
- ' ^https://s3\.amazonaws\.com/([^/]+)/(.+)$' , ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
348
- ' ^https://s3\.[^/]+/([^/]+)/(.+)$' , ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
349
- ' ^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
350
- };
351
-
352
- % For each pattern, specify which group is bucket and which is path
353
- % regexp will extract multiple tokens from the patterns above.
354
- % For each pattern, the indices below denote the location of
355
- % the bucket and the path name.
356
- bucketIdx = [1 , 1 , 1 , 1 , 1 , 1 ];
357
- pathIdx = [3 , 2 , 2 , 2 , 2 , 2 ];
358
-
359
- % Iterate through the patterns and identify the pattern which matches the
360
- % URI. Extract the bucket name and the path.
361
- for patternIdx = 1 : numel(patterns )
362
- tokens = regexp(url , patterns{patternIdx }, ' tokens' );
363
- if ~isempty(tokens )
364
- t = tokens{1 };
365
- bucketName = t{bucketIdx(patternIdx )};
366
- objectPath = t{pathIdx(patternIdx )};
367
- return ;
368
- end
369
- end
370
-
371
- error(" MATLAB:Zarr:invalidS3URL" ," Invalid S3 URI format." );
372
- end
380
+
373
381
end
374
382
375
383
end
0 commit comments