Skip to content

Commit cd6e635

Browse files
authored
GH-38354: [MATLAB] Implement fromMATLAB method for arrow.array.ListArray (#38561)
### Rationale for this change We should implement a static `fromMATLAB` method for `arrow.array.ListArray` that takes in a MATLAB `cell` array and returns an instance of `arrow.array.ListArray`. Adding this method enables users to create an `arrow.array.ListArray` by passing a MATLAB `cell` array to the `arrow.array` gateway function: ```matlab >> C = {[1 2 3], [4 5], 6}; >> array = arrow.array(C) array = ListArray with 3 elements and 0 null values: [ [ 1, 2, 3 ], [ 4, 5 ], [ 6 ] ] ``` Internally, the `arrow.array` gateway function will call `arrow.array.ListArray.fromMATLAB` to construct a `ListArray` from the given `cell` array. ### What changes are included in this PR? 1. Implemented `fromMATLAB` method on `arrow.array.ListArray`. This method accepts a MATLAB `cell` array and returns an instance of `arrow.array.ListArray`. 2. Set the `ArrayStaticConstructor` property of `arrow.type.traits.ListTraits` to `@ arrow.array.ListArray.fromMATLAB`. 3. Added a switch case for `"cell"` to the `arrow.array` gateway function that invokes `arrow.array.ListArray.fromMATLAB` with the input `cell` array. ### Are these changes tested? Yes. I added a new test class to the `test/arrow/array/list` folder named `tFromMATLAB.m`. ### Are there any user-facing changes? Yes. Users can now create instances of `arrow.array.ListArray` by passing `cell` arrays to `arrow.array`: ```matlab >> C = {["A" "B"], ["C" "D" "E"], missing, ["F" "G"], string.empty(0, 1)}; >> array = arrow.array(C) array = ListArray with 5 elements and 1 null value: [ [ "A", "B" ], [ "C", "D", "E" ], null, [ "F", "G" ], [] ] ``` * Closes: #38354 Authored-by: Sarah Gilmore <sgilmore@mathworks.com> Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
1 parent 1749e00 commit cd6e635

File tree

7 files changed

+291
-4
lines changed

7 files changed

+291
-4
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
% Licensed to the Apache Software Foundation (ASF) under one or more
2+
% contributor license agreements. See the NOTICE file distributed with
3+
% this work for additional information regarding copyright ownership.
4+
% The ASF licenses this file to you under the Apache License, Version
5+
% 2.0 (the "License"); you may not use this file except in compliance
6+
% with the License. You may obtain a copy of the License at
7+
%
8+
% http://www.apache.org/licenses/LICENSE-2.0
9+
%
10+
% Unless required by applicable law or agreed to in writing, software
11+
% distributed under the License is distributed on an "AS IS" BASIS,
12+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13+
% implied. See the License for the specific language governing
14+
% permissions and limitations under the License.
15+
16+
function idx = findFirstNonMissingElement(C)
17+
idx = -1;
18+
for ii=1:numel(C)
19+
if ~isa(C{ii}, "missing")
20+
idx = ii;
21+
return;
22+
end
23+
end
24+
end

matlab/src/matlab/+arrow/+array/ListArray.m

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,60 @@
109109
array = arrow.array.ListArray(proxy);
110110
end
111111

112+
function array = fromMATLAB(C)
113+
arguments
114+
C(:, 1) cell {mustBeNonempty}
115+
end
116+
import arrow.array.internal.list.findFirstNonMissingElement
117+
import arrow.array.internal.list.createValidator
118+
119+
idx = findFirstNonMissingElement(C);
120+
121+
if idx == -1
122+
id = "arrow:array:list:CellArrayAllMissing";
123+
msg = "The input cell array must contain at least one non-missing" + ...
124+
" value to be converted to an Arrow array.";
125+
error(id, msg);
126+
end
127+
128+
validator = createValidator(C{idx});
129+
130+
numElements = numel(C);
131+
valid = true([numElements 1]);
132+
% All elements before the first non-missing value should be
133+
% treated as null values.
134+
valid(1:idx-1) = false;
135+
offsets = zeros([numElements + 1, 1], "int32");
136+
137+
for ii = idx:numElements
138+
element = C{ii};
139+
if isa(element, "missing")
140+
% Treat missing values as null values.
141+
valid(ii) = false;
142+
offsets(ii + 1) = offsets(ii);
143+
else
144+
validator.validateElement(element);
145+
length = validator.getElementLength(element);
146+
offsets(ii + 1) = offsets(ii) + length;
147+
end
148+
end
149+
150+
offsetArray = arrow.array(offsets);
151+
152+
validValueCellArray = validator.reshapeCellElements(C(valid));
153+
values = vertcat(validValueCellArray{:});
154+
valueArray = arrow.array(values);
155+
156+
args = struct(...
157+
OffsetsProxyID=offsetArray.Proxy.ID, ...
158+
ValuesProxyID=valueArray.Proxy.ID, ...
159+
Valid=valid ...
160+
);
161+
162+
proxyName = "arrow.array.proxy.ListArray";
163+
proxy = arrow.internal.proxy.create(proxyName, args);
164+
array = arrow.array.ListArray(proxy);
165+
end
112166
end
113167

114168
end

matlab/src/matlab/+arrow/+type/+traits/ListTraits.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
ArrayConstructor = @arrow.array.ListArray
2020
ArrayClassName = "arrow.array.ListArray"
2121
ArrayProxyClassName = "arrow.array.proxy.ListArray"
22-
ArrayStaticConstructor = missing
22+
ArrayStaticConstructor = @arrow.array.ListArray.fromMATLAB
2323
TypeConstructor = @arrow.type.ListType
2424
TypeClassName = "arrow.type.ListType"
2525
TypeProxyClassName = "arrow.type.proxy.ListType"

matlab/src/matlab/+arrow/array.m

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
arrowArray = arrow.array.Time64Array.fromMATLAB(data, varargin{:});
5050
case "table"
5151
arrowArray = arrow.array.StructArray.fromMATLAB(data, varargin{:});
52+
case "cell"
53+
arrowArray = arrow.array.ListArray.fromMATLAB(data, varargin{:});
5254
otherwise
5355
errid = "arrow:array:UnsupportedMATLABType";
5456
msg = join(["Unable to convert MATLAB type" classname "to arrow array."]);
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
%TFROMMATLAB Unit tests for arrow.array.ListArray's froMATLAB method.
2+
3+
% Licensed to the Apache Software Foundation (ASF) under one or more
4+
% contributor license agreements. See the NOTICE file distributed with
5+
% this work for additional information regarding copyright ownership.
6+
% The ASF licenses this file to you under the Apache License, Version
7+
% 2.0 (the "License"); you may not use this file except in compliance
8+
% with the License. You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15+
% implied. See the License for the specific language governing
16+
% permissions and limitations under the License.
17+
18+
classdef tFromMATLAB < matlab.unittest.TestCase
19+
20+
methods (Test)
21+
function EmptyCellArrayError(testCase)
22+
% Verify fromMATLAB throws an error whose identifier is
23+
% "MATLAB:validators:mustBeNonempty" if given an empty cell
24+
% array as input.
25+
import arrow.array.ListArray
26+
27+
fcn = @() ListArray.fromMATLAB({});
28+
testCase.verifyError(fcn, "MATLAB:validators:mustBeNonempty");
29+
end
30+
31+
function MustBeCellArrayError(testCase)
32+
% Verify fromMATLAB throws an error whose identifier is
33+
% "MATLAB:validation:UnableToConvert" if the input provided is
34+
% not a cell array.
35+
import arrow.array.ListArray
36+
37+
fcn = @() ListArray.fromMATLAB('a');
38+
testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert");
39+
end
40+
41+
function AllMissingCellArrayError(testCase)
42+
% Verify fromMATLAB throws an error whose identifier is
43+
% "arrow:array:list:CellArrayAllMissing" if given a cell array
44+
% containing only missing values.
45+
import arrow.array.ListArray
46+
47+
C = {missing missing missing};
48+
fcn = @() ListArray.fromMATLAB(C);
49+
testCase.verifyError(fcn, "arrow:array:list:CellArrayAllMissing");
50+
end
51+
52+
function ListOfFloat64(testCase)
53+
% Verify fromMATLAB creates the expected ListArray whose
54+
% Values property is a Float64Array.
55+
import arrow.array.ListArray
56+
57+
C = {[1 2 3], [4 5], missing, [6 7 8], [], [9 10]};
58+
actual = ListArray.fromMATLAB(C);
59+
60+
values = arrow.array(1:10);
61+
offsets = arrow.array(int32([0 3 5 5 8 8 10]));
62+
expected = ListArray.fromArrays(offsets, values, Valid=[1 2 4 5 6]);
63+
64+
testCase.verifyEqual(actual, expected);
65+
end
66+
67+
function ListOfStruct(testCase)
68+
% Verify fromMATLAB creates the expected ListArray whose
69+
% Values property is a StructArray.
70+
import arrow.array.ListArray
71+
72+
Number = (1:10)';
73+
Text = compose("Test%d", (1:10)');
74+
Date = datetime(2023, 11, 2) + days(0:9)';
75+
T = table(Number, Text, Date);
76+
C = {missing, T(1:3, :), T(4, :), T(1:0, :), T(5:10, :), missing};
77+
actual = ListArray.fromMATLAB(C);
78+
79+
values = arrow.array(T);
80+
offsets = arrow.array(int32([0 0 3 4 4 10 10]));
81+
expected = ListArray.fromArrays(offsets, values, Valid=[2 3 4 5]);
82+
83+
testCase.verifyEqual(actual, expected);
84+
end
85+
86+
function ListOfListOfString(testCase)
87+
% Verify fromMATLAB creates the expected ListArray whose
88+
% Values property is a ListArray.
89+
import arrow.array.ListArray
90+
91+
rowOne = {["A" "B"], ["C" "D" "E"] missing};
92+
rowTwo = missing;
93+
rowThree = {"F" ["G" "H" "I"]};
94+
C = {rowOne, rowTwo rowThree};
95+
actual = ListArray.fromMATLAB(C);
96+
97+
stringValues = arrow.array(["A" "B" "C" "D" "E" "F" "G" "H" "I"]);
98+
innerOffsets = arrow.array(int32([0 2 5 5 6 9]));
99+
valuesList = ListArray.fromArrays(innerOffsets, stringValues, Valid=[1 2 4 5]);
100+
101+
outerOffsets = arrow.array(int32([0 3 3 5]));
102+
expected = ListArray.fromArrays(outerOffsets, valuesList, Valid=[1 3]);
103+
104+
testCase.verifyEqual(actual, expected);
105+
end
106+
107+
function OnlyEmptyElement(testCase)
108+
% Create a ListArray containing only empty elements.
109+
import arrow.array.ListArray
110+
111+
emptyDuration = duration.empty(0, 0);
112+
113+
C = {emptyDuration, emptyDuration, emptyDuration, emptyDuration};
114+
actual = ListArray.fromMATLAB(C);
115+
116+
values = arrow.array(duration.empty);
117+
offsets = arrow.array(int32([0 0 0 0 0]));
118+
expected = ListArray.fromArrays(offsets, values);
119+
120+
testCase.verifyEqual(actual, expected);
121+
end
122+
123+
function CellOfEmptyCell(testCase)
124+
% Verify fromMATLAB creates a ListArray whose Values property
125+
% is a StringArray when given a cell array containing just an
126+
% empty cell array.
127+
import arrow.array.ListArray
128+
129+
C = {{}};
130+
actual = ListArray.fromMATLAB(C);
131+
132+
values = arrow.array(string.empty);
133+
offsets = arrow.array(int32([0 0]));
134+
expected = ListArray.fromArrays(offsets, values);
135+
136+
testCase.verifyEqual(actual, expected);
137+
end
138+
139+
function CellOfMatrices(testCase)
140+
% Verify fromMATLAB can handle cell arrays that contain
141+
% matrices instead of just vectors - i.e. the matrices are
142+
% reshaped as column vectors before they are concatenated
143+
% together.
144+
import arrow.array.ListArray
145+
146+
C = {[1 2 3; 4 5 6], [7 8; 9 10], 11};
147+
actual = ListArray.fromMATLAB(C);
148+
149+
values = arrow.array([1 4 2 5 3 6 7 9 8 10 11]);
150+
offsets = arrow.array(int32([0 6 10 11]));
151+
expected = ListArray.fromArrays(offsets, values);
152+
153+
testCase.verifyEqual(actual, expected);
154+
end
155+
156+
function ClassTypeMismatchError(testCase)
157+
% Verify fromMATLAB throws an error whose identifier is
158+
% "arrow:array:list:ClassTypeMismatch" if given a cell array
159+
% containing arrays with different class types.
160+
import arrow.array.ListArray
161+
162+
C = {1, [2 3 4], "A", 5};
163+
fcn = @() ListArray.fromMATLAB(C);
164+
testCase.verifyError(fcn, "arrow:array:list:ClassTypeMismatch");
165+
end
166+
167+
function VariableNamesMismatchError(testCase)
168+
% Verify fromMATLAB throws an error whose identifier is
169+
% "arrow:array:list:VariableNamesMismatch" if given a cell
170+
% array containing tables whose variable names don't match.
171+
import arrow.array.ListArray
172+
173+
C = {table(1, "A"), table(2, "B", VariableNames=["X", "Y"])};
174+
fcn = @() ListArray.fromMATLAB(C);
175+
testCase.verifyError(fcn, "arrow:array:list:VariableNamesMismatch");
176+
end
177+
178+
function ExpectedZonedDatetimeError(testCase)
179+
% Verify fromMATLAB throws an error whose identifier is
180+
% "arrow:array:list:ExpectedZonedDatetime" if given a cell
181+
% array containing zoned and unzoned datetimes - in that order.
182+
183+
import arrow.array.ListArray
184+
185+
C = {datetime(2023, 11, 1, TimeZone="UTC"), datetime(2023, 11, 2)};
186+
fcn = @() ListArray.fromMATLAB(C);
187+
testCase.verifyError(fcn, "arrow:array:list:ExpectedZonedDatetime");
188+
end
189+
190+
function ExpectedUnzonedDatetimeError(testCase)
191+
% Verify fromMATLAB throws an error whose identifier is
192+
% "arrow:array:list:ExpectedUnzonedDatetime" if given a cell
193+
% array containing unzoned and zoned datetimes - in that order.
194+
195+
import arrow.array.ListArray
196+
197+
C = {datetime(2023, 11, 1), datetime(2023, 11, 2, TimeZone="UTC")};
198+
fcn = @() ListArray.fromMATLAB(C);
199+
testCase.verifyError(fcn, "arrow:array:list:ExpectedUnzonedDatetime");
200+
end
201+
202+
203+
204+
end
205+
206+
end

matlab/test/arrow/array/tArray.m

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
{datetime(2022, 1, 1), "arrow.array.TimestampArray"}, ...
3434
{seconds([1 2]), "arrow.array.Time64Array"}, ...
3535
{["A" "B"], "arrow.array.StringArray"}, ...
36-
{table(["A" "B"]'), "arrow.array.StructArray"}};
36+
{table(["A" "B"]'), "arrow.array.StructArray"}, ...
37+
{{[1, 2, 3], [4, 5]}, "arrow.array.ListArray"}};
3738
end
3839

3940
methods(Test)
@@ -51,7 +52,7 @@ function UnsupportedMATLABTypeError(testCase)
5152
% Verify arrow.array throws an error with the identifier
5253
% "arrow:array:UnsupportedMATLABType" if the input array is not one
5354
% we support converting into an Arrow array.
54-
matlabArray = {table};
55+
matlabArray = calmonths(12);
5556
fcn = @() arrow.array(matlabArray);
5657
errID = "arrow:array:UnsupportedMATLABType";
5758
testCase.verifyError(fcn, errID);

matlab/test/arrow/type/traits/tListTraits.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
ArrayConstructor = @arrow.array.ListArray
2121
ArrayClassName = "arrow.array.ListArray"
2222
ArrayProxyClassName = "arrow.array.proxy.ListArray"
23-
ArrayStaticConstructor = missing
23+
ArrayStaticConstructor = @arrow.array.ListArray.fromMATLAB
2424
TypeConstructor = @arrow.type.ListType
2525
TypeClassName = "arrow.type.ListType"
2626
TypeProxyClassName = "arrow.type.proxy.ListType"

0 commit comments

Comments
 (0)