Skip to content

Commit 8a63c4e

Browse files
committed
Tests and edge-cases clean-up
1 parent 137ad5a commit 8a63c4e

File tree

5 files changed

+150
-85
lines changed

5 files changed

+150
-85
lines changed

Zarr.m

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,43 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
171171
end
172172

173173
end
174+
175+
function [bucketName, objectPath] = extractS3BucketNameAndPath(url)
176+
% Helper function to extract S3 bucket name and path to file
177+
% bucketName and objectPath are needed to fill the KVstore hash
178+
% map for tensorstore.
179+
% Define the regular expression patterns for matching S3 URLs and URIs
180+
% S3 URLs can have the following patterns.
181+
patterns = { ...
182+
'^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
183+
'^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
184+
'^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
185+
'^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
186+
'^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
187+
'^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
188+
};
189+
190+
% For each pattern, specify which group is bucket and which is path
191+
% regexp will extract multiple tokens from the patterns above.
192+
% For each pattern, the indices below denote the location of
193+
% the bucket and the path name.
194+
bucketIdx = [1, 1, 1, 1, 1, 1];
195+
pathIdx = [3, 2, 2, 2, 2, 2];
196+
197+
% Iterate through the patterns and identify the pattern which matches the
198+
% URI. Extract the bucket name and the path.
199+
for patternIdx = 1:numel(patterns)
200+
tokens = regexp(url, patterns{patternIdx}, 'tokens');
201+
if ~isempty(tokens)
202+
t = tokens{1};
203+
bucketName = t{bucketIdx(patternIdx)};
204+
objectPath = t{pathIdx(patternIdx)};
205+
return;
206+
end
207+
end
208+
209+
error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format.");
210+
end
174211
end
175212

176213
methods
@@ -183,7 +220,7 @@ function makeZarrGroups(existingParentPath, newGroupsPath)
183220
obj.isRemote = matlab.io.internal.vfs.validators.hasIriPrefix(obj.Path);
184221
if obj.isRemote % Remote file (only S3 support at the moment)
185222
% Extract the S3 bucket name and path
186-
[bucketName, objectPath] = obj.extractS3BucketNameAndPath(obj.Path);
223+
[bucketName, objectPath] = Zarr.extractS3BucketNameAndPath(obj.Path);
187224
% Create a Python dictionary for the KV store driver
188225
obj.KVStoreSchema = py.ZarrPy.createKVStore(obj.isRemote, objectPath, bucketName);
189226

@@ -241,7 +278,13 @@ function create(obj, dtype, data_size, chunk_size, fillvalue, compression)
241278
if isempty(fillvalue)
242279
obj.FillValue = py.None;
243280
else
244-
obj.FillValue = cast(fillvalue, obj.Datatype.MATLABType);
281+
% Fill value must be of the same datatype as data.
282+
if ~isa(fillvalue, dtype)
283+
error("MATLAB:zarrcreate:invalidFillValueType",...
284+
"FillValue must be of the same datatype as data (""%s"").",...
285+
dtype)
286+
end
287+
obj.FillValue = fillvalue;
245288
end
246289

247290
% see how much of the provided path exists already
@@ -334,42 +377,7 @@ function write(obj, data)
334377
end
335378
end
336379

337-
function [bucketName, objectPath] = extractS3BucketNameAndPath(~,url)
338-
% Helper function to extract S3 bucket name and path to file
339-
% bucketName and objectPath are needed to fill the KVstore hash
340-
% map for tensorstore.
341-
% Define the regular expression patterns for matching S3 URLs and URIs
342-
% S3 URLs can have the following patterns.
343-
patterns = { ...
344-
'^https://([^.]+)\.s3\.([^.]+)\.amazonaws\.com/(.+)$', ... % 1: AWS virtual-hosted, region (https://mybucket.s3.us-west-2.amazonaws.com/path/to/myZarrFile)
345-
'^https://([^.]+)\.s3\.amazonaws\.com/(.+)$', ... % 2: AWS virtual-hosted, no region (https://mybucket.s3.amazonaws.com/path/to/myZarrFile)
346-
'^https://([^.]+)\.s3\.[^/]+/(.+)$', ... % 3: Custom endpoint virtual-hosted (https://mybucket.s3.custom-endpoint.org/path/to/myZarrFile)
347-
'^https://s3\.amazonaws\.com/([^/]+)/(.+)$', ... % 4: AWS path-style (https://s3.amazonaws.com/mybucket/path/to/myZarrFile)
348-
'^https://s3\.[^/]+/([^/]+)/(.+)$', ... % 5: Custom endpoint path-style (https://s3.eu-central-1.example.edu/mybucket/path/to/myZarrFile)
349-
'^s3://([^/]+)/(.+)$' ... % 6: S3 URI (s3://mybucket/path/to/myZarrFile)
350-
};
351-
352-
% For each pattern, specify which group is bucket and which is path
353-
% regexp will extract multiple tokens from the patterns above.
354-
% For each pattern, the indices below denote the location of
355-
% the bucket and the path name.
356-
bucketIdx = [1, 1, 1, 1, 1, 1];
357-
pathIdx = [3, 2, 2, 2, 2, 2];
358-
359-
% Iterate through the patterns and identify the pattern which matches the
360-
% URI. Extract the bucket name and the path.
361-
for patternIdx = 1:numel(patterns)
362-
tokens = regexp(url, patterns{patternIdx}, 'tokens');
363-
if ~isempty(tokens)
364-
t = tokens{1};
365-
bucketName = t{bucketIdx(patternIdx)};
366-
objectPath = t{pathIdx(patternIdx)};
367-
return;
368-
end
369-
end
370-
371-
error("MATLAB:Zarr:invalidS3URL","Invalid S3 URI format.");
372-
end
380+
373381
end
374382

375383
end

test/tZarrAttributes.m

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,49 +7,62 @@
77
function createZarrArrayWithAttrs(testcase)
88
% Create Zarr array and add some attributes.
99
zarrcreate(testcase.ArrPathWrite,testcase.ArrSize);
10-
zarrwriteatt(testcase.ArrPathWrite,'attr1','This is an array attribute.');
11-
zarrwriteatt(testcase.ArrPathWrite,'attr2',{1,2,3});
12-
attr3.numVal = 10;
13-
attr3.strArr = ["array","attribute"];
14-
zarrwriteatt(testcase.ArrPathWrite,'attr3',attr3);
10+
zarrwriteatt(testcase.ArrPathWrite,'scalarText','This is an array attribute.');
11+
zarrwriteatt(testcase.ArrPathWrite,'numericVector',[1,2,3]);
12+
zarrwriteatt(testcase.ArrPathWrite,'numericCellArray',{1,2,3});
13+
zarrwriteatt(testcase.ArrPathWrite,'mixedCellArray',{1,'two',3});
14+
attrStruct.numVal = 10;
15+
attrStruct.strArr = ["array","attribute"];
16+
zarrwriteatt(testcase.ArrPathWrite,'struct',attrStruct);
1517
end
1618
end
1719

1820
methods(Test)
1921
function verifyArrayAttributeInfo(testcase)
20-
% Write attribute info using zarrwriteatt function to an array.
21-
22-
arrInfo = zarrinfo(testcase.ArrPathWrite);
23-
actAttr.attr1 = arrInfo.attr1;
24-
25-
% TODO: Enable code once Issue-34 is fixed.
26-
%actAttr.attr2 = arrInfo.attr2;
27-
%actAttr.attr3 = arrInfo.attr3;
28-
29-
expAttr.attr1 = 'This is an array attribute.';
30-
%expAttr.attr2 = {1,2,3};
31-
%expAttr.attr3.numVal = 10;
32-
%expAttr.attr4.strArr = ["array","attribute"];
33-
34-
testcase.verifyEqual(actAttr,expAttr,'Failed to verify attribute info.');
22+
% Write attribute info using zarrwriteatt function to an array
23+
% (during test setup) and verify written values using zarrinfo
24+
25+
actInfo = zarrinfo(testcase.ArrPathWrite);
26+
27+
testcase.verifyEqual(actInfo.scalarText,...
28+
'This is an array attribute.',...
29+
'Failed to verify attribute info for scalar text.');
30+
testcase.verifyEqual(actInfo.numericVector,...
31+
[1;2;3],... % JSON stores all vectors as column vectors
32+
'Failed to verify attribute info for numeric vector.');
33+
testcase.verifyEqual(actInfo.numericCellArray,...
34+
[1;2;3],... % JSON stores numeric cell array as column vector
35+
'Failed to verify attribute info for numeric cell array.');
36+
testcase.verifyEqual(actInfo.mixedCellArray,...
37+
{1; 'two'; 3},...% JSON stores all vectors as column vectors
38+
'Failed to verify attribute info for mixed cell array.');
39+
40+
expStruct.numVal = 10;
41+
% JSON stores string arrays as column cell arrays of char
42+
% vectors
43+
expStruct.strArr = {'array';'attribute'};
44+
testcase.verifyEqual(actInfo.struct,...
45+
expStruct,...
46+
'Failed to verify attribute info for struct.');
3547
end
3648

3749
function verifyAttrOverwrite(testcase)
3850
% Verify attribute value after overwrite.
39-
%testcase.assumeTrue(false,'Filtered until the attributes display is fixed.');
40-
expAttrStr = ["new","attribute","value"];
41-
zarrwriteatt(testcase.ArrPathWrite,'attr1',expAttrStr);
51+
52+
expAttrStr = 'New attribute value';
53+
zarrwriteatt(testcase.ArrPathWrite,'scalarText',expAttrStr);
4254
expAttrDbl = 10;
43-
zarrwriteatt(testcase.ArrPathWrite,'attr2',expAttrDbl);
55+
zarrwriteatt(testcase.ArrPathWrite,'numericVector',expAttrDbl);
4456

4557
arrInfo = zarrinfo(testcase.ArrPathWrite);
46-
47-
% TODO: Enable code once Issue-34 is fixed.
48-
%actAttrStr = arrInfo.attr1;
49-
actAttrDbl = arrInfo.attr2;
5058

51-
%testcase.verifyEqual(actAttrStr,expAttrStr,'Failed to verify string attribute info');
52-
testcase.verifyEqual(actAttrDbl,expAttrDbl,'Failed to verify double attribute info');
59+
actAttrStr = arrInfo.scalarText;
60+
actAttrDbl = arrInfo.numericVector;
61+
62+
testcase.verifyEqual(actAttrStr,expAttrStr,...
63+
'Failed to verify string attribute info');
64+
testcase.verifyEqual(actAttrDbl,expAttrDbl,...
65+
'Failed to verify double attribute info');
5366
end
5467

5568
function verifyGroupAttributeInfo(testcase)

test/tZarrCreate.m

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,30 @@
44
% Copyright 2025 The MathWorks, Inc.
55

66
methods(Test)
7+
8+
function createDefaultArray(testcase)
9+
% Verify that zarrcreate correctly creates a Zarr array with
10+
% all default properties
11+
12+
zarrcreate(testcase.ArrPathWrite,testcase.ArrSize);
13+
14+
expInfo.chunks = testcase.ArrSize';
15+
expInfo.compressor = [];
16+
expInfo.dimension_separator = '.';
17+
expInfo.dtype = '<f8';
18+
expInfo.fill_value = [];
19+
expInfo.filters = [];
20+
expInfo.order = 'C';
21+
expInfo.shape = testcase.ArrSize';
22+
expInfo.zarr_format = 2;
23+
expInfo.node_type = 'array';
24+
25+
actInfo = zarrinfo(testcase.ArrPathWrite);
26+
testcase.verifyEqual(actInfo, expInfo,...
27+
'Failed to verify creating Zarr array with default properties');
28+
29+
end
30+
731
function createIntermediateZgroups(testcase)
832
% Verify that zarrcreate creates zarr groups when given a
933
% nested path
@@ -36,7 +60,7 @@ function createArrayRelativePath(testcase)
3660
inpPath = fullfile('..','myGrp','myArr');
3761
zarrcreate(inpPath,[10 10]);
3862
arrInfo = zarrinfo(inpPath);
39-
testcase.verifyEqual(arrInfo.zarr_format,2,'Failed to Zarr array format');
63+
testcase.verifyEqual(arrInfo.zarr_format,2,'Failed to verify Zarr array format');
4064
testcase.verifyEqual(arrInfo.node_type,'array','Unexpected Zarr array node type');
4165
end
4266

@@ -172,29 +196,50 @@ function invalidShuffleBlosc(testcase)
172196

173197
function invalidChunkSize(testcase)
174198
% Verify error when an invalid type for the chunk size is used.
175-
testcase.assumeTrue(false,'Filtered until issue 25 is fixed.');
199+
176200
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
177-
'ChunkSize',5),testcase.PyException);
201+
'ChunkSize',5),'MATLAB:zarrcreate:chunkDimsMismatch');
178202
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
179-
'ChunkSize',[]),testcase.PyException);
203+
'ChunkSize',[]),'MATLAB:zarrcreate:chunkDimsMismatch');
180204
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
181-
'ChunkSize',[0 0]),testcase.PyException);
205+
'ChunkSize',[0 0]),'MATLAB:validators:mustBePositive');
182206
end
183207

184208
function invalidFillValue(testcase)
185209
% Verify error when an invalid type for the fill value is used.
210+
186211
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
187212
"FillValue",[-9 -9]),testcase.PyException);
188-
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
189-
% "FillValue",NaN),testcase.PyException);
190-
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
191-
% "FillValue",inf),testcase.PyException);
213+
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize, ...
214+
"FillValue","none"),'MATLAB:validators:mustBeNumeric');
215+
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,testcase.ArrSize,...
216+
Datatype="int8", FillValue=1.4), 'MATLAB:zarrcreate:invalidFillValueType')
217+
end
218+
219+
function specialFillValue(testcase)
220+
% Verify creating Zarr arrays using special fill values like
221+
% NaN and Inf
222+
223+
expData = [NaN,NaN];
224+
zarrcreate(testcase.ArrPathWrite, [1,2], FillValue=NaN)
225+
actData = zarrread(testcase.ArrPathWrite);
226+
testcase.verifyEqual(expData, actData)
227+
228+
expData = [Inf,Inf];
229+
zarrcreate(testcase.ArrPathWrite, [1,2], FillValue=Inf)
230+
actData = zarrread(testcase.ArrPathWrite);
231+
testcase.verifyEqual(expData, actData)
192232
end
193233

194234
function invalidSizeInput(testcase)
195235
% Verify error when an invalid size input is used.
196-
% testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,[]), ...
197-
% testcase.PyException);
236+
237+
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,[]), ...
238+
'MATLAB:validators:mustBeNonempty');
239+
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,Inf), ...
240+
'MATLAB:validators:mustBeFinite');
241+
testcase.verifyError(@()zarrcreate(testcase.ArrPathWrite,-2), ...
242+
'MATLAB:validators:mustBePositive');
198243
end
199244

200245
function invalidDatatype(testcase)

test/tZarrWrite.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ function createArrayLocalUserDefinedSyntax(testcase,DataType,CompId)
3939
% Verify the data when creating and writing to arrays with
4040
% user-defined properties using zarrcreate and zarrwrite locally.
4141
comp.level = 5;
42-
fillValue = -9;
42+
fillValue = cast(-9, DataType);
4343
expData = cast(ones(testcase.ArrSize),DataType);
4444
comp.id = CompId;
4545
zarrcreate(testcase.ArrPathWrite,testcase.ArrSize,'ChunkSize',testcase.ChunkSize, ...

zarrcreate.m

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,13 @@ function zarrcreate(filepath, datasize, options)
8181

8282
arguments
8383
filepath {mustBeTextScalar, mustBeNonempty}
84-
datasize (1,:) double {mustBeFinite, mustBeNonnegative}
85-
options.ChunkSize (1,:) double {mustBeFinite, mustBeNonnegative} = datasize
84+
datasize (1,:) double {mustBeFinite, mustBePositive, mustBeNonempty}
85+
options.ChunkSize (1,:) double {mustBeFinite, mustBePositive} = datasize
8686
options.Datatype {mustBeTextScalar, mustBeNonempty} = 'double'
87-
options.FillValue {mustBeNumeric} = []
87+
options.FillValue {mustBeNumericOrLogical} = []
8888
options.Compression {mustBeStructOrEmpty} = []
8989
end
9090

91-
zarrObj = Zarr(filepath);
92-
9391
% Dimensionality of the dataset and the chunk size must be the same
9492
if any(size(datasize) ~= size(options.ChunkSize))
9593
error("MATLAB:zarrcreate:chunkDimsMismatch",...
@@ -105,6 +103,7 @@ function zarrcreate(filepath, datasize, options)
105103
options.ChunkSize = [1 options.ChunkSize];
106104
end
107105

106+
zarrObj = Zarr(filepath);
108107
zarrObj.create(options.Datatype, datasize, options.ChunkSize, options.FillValue, options.Compression)
109108

110109
end

0 commit comments

Comments
 (0)