Skip to content

Commit 442ced0

Browse files
mrknpitrou
authored andcommitted
ARROW-4320: [C++] Add tests for non-contiguous tensors
I would like to add some test cases for tensors with non-contiguous strides. Author: Kenta Murata <mrkn@mrkn.jp> Author: Antoine Pitrou <antoine@python.org> Closes #3453 from mrkn/add_strided_numeric_tensor_tests and squashes the following commits: 5841794 <Antoine Pitrou> Remove ARROW_EXPORT from template function definition. 53179ea <Kenta Murata> Fix compilation error f153e0b <Kenta Murata> make format 8cfac94 <Kenta Murata> Refactoring: extract AssertNumericDataEqual 20c8728 <Kenta Murata> Refactoring: extract AssertCOOIndex function c9767f7 <Kenta Murata> Add assertions to examine the strides layout 4cfef06 <Kenta Murata> Add a new test of csr sparse matrix creation from non-contiguous tensor d9f32f1 <Kenta Murata> Add a new test of coo sparse tensor creation from non-contiguous tensor 0c1573c <Kenta Murata> Fix NumericTensor tests
1 parent 823dd43 commit 442ced0

File tree

3 files changed

+131
-85
lines changed

3 files changed

+131
-85
lines changed

cpp/src/arrow/sparse_tensor-test.cc

Lines changed: 89 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
3838
ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
3939
}
4040

41+
static inline void AssertCOOIndex(
42+
const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
43+
const std::vector<int64_t>& expected_values) {
44+
int64_t n = static_cast<int64_t>(expected_values.size());
45+
for (int64_t i = 0; i < n; ++i) {
46+
ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
47+
}
48+
}
49+
4150
TEST(TestSparseCOOTensor, CreationEmptyTensor) {
4251
std::vector<int64_t> shape = {2, 3, 4};
4352
SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
@@ -84,13 +93,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
8493
ASSERT_EQ("", st1.dim_name(1));
8594
ASSERT_EQ("", st1.dim_name(2));
8695

87-
const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
88-
for (int i = 0; i < 6; ++i) {
89-
ASSERT_EQ(i + 1, ptr[i]);
90-
}
91-
for (int i = 0; i < 6; ++i) {
92-
ASSERT_EQ(i + 11, ptr[i + 6]);
93-
}
96+
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
97+
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
9498

9599
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
96100
ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
@@ -99,30 +103,11 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
99103
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
100104
ASSERT_TRUE(sidx->is_column_major());
101105

102-
// (0, 0, 0) -> 1
103-
ASSERT_EQ(0, sidx->Value({0, 0}));
104-
ASSERT_EQ(0, sidx->Value({0, 1}));
105-
ASSERT_EQ(0, sidx->Value({0, 2}));
106-
107-
// (0, 0, 2) -> 2
108-
ASSERT_EQ(0, sidx->Value({1, 0}));
109-
ASSERT_EQ(0, sidx->Value({1, 1}));
110-
ASSERT_EQ(2, sidx->Value({1, 2}));
111-
112-
// (0, 1, 1) -> 3
113-
ASSERT_EQ(0, sidx->Value({2, 0}));
114-
ASSERT_EQ(1, sidx->Value({2, 1}));
115-
ASSERT_EQ(1, sidx->Value({2, 2}));
116-
117-
// (1, 2, 1) -> 15
118-
ASSERT_EQ(1, sidx->Value({10, 0}));
119-
ASSERT_EQ(2, sidx->Value({10, 1}));
120-
ASSERT_EQ(1, sidx->Value({10, 2}));
121-
122-
// (1, 2, 3) -> 16
123-
ASSERT_EQ(1, sidx->Value({11, 0}));
124-
ASSERT_EQ(2, sidx->Value({11, 1}));
125-
ASSERT_EQ(3, sidx->Value({11, 2}));
106+
AssertCOOIndex(sidx, 0, {0, 0, 0});
107+
AssertCOOIndex(sidx, 1, {0, 0, 2});
108+
AssertCOOIndex(sidx, 2, {0, 1, 1});
109+
AssertCOOIndex(sidx, 10, {1, 2, 1});
110+
AssertCOOIndex(sidx, 11, {1, 2, 3});
126111
}
127112

128113
TEST(TestSparseCOOTensor, CreationFromTensor) {
@@ -147,43 +132,47 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
147132
ASSERT_EQ("", st1.dim_name(1));
148133
ASSERT_EQ("", st1.dim_name(2));
149134

150-
const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
151-
for (int i = 0; i < 6; ++i) {
152-
ASSERT_EQ(i + 1, ptr[i]);
153-
}
154-
for (int i = 0; i < 6; ++i) {
155-
ASSERT_EQ(i + 11, ptr[i + 6]);
156-
}
135+
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
136+
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
157137

158138
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
159139
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
160140
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
161141
ASSERT_TRUE(sidx->is_column_major());
162142

163-
// (0, 0, 0) -> 1
164-
ASSERT_EQ(0, sidx->Value({0, 0}));
165-
ASSERT_EQ(0, sidx->Value({0, 1}));
166-
ASSERT_EQ(0, sidx->Value({0, 2}));
167-
168-
// (0, 0, 2) -> 2
169-
ASSERT_EQ(0, sidx->Value({1, 0}));
170-
ASSERT_EQ(0, sidx->Value({1, 1}));
171-
ASSERT_EQ(2, sidx->Value({1, 2}));
172-
173-
// (0, 1, 1) -> 3
174-
ASSERT_EQ(0, sidx->Value({2, 0}));
175-
ASSERT_EQ(1, sidx->Value({2, 1}));
176-
ASSERT_EQ(1, sidx->Value({2, 2}));
177-
178-
// (1, 2, 1) -> 15
179-
ASSERT_EQ(1, sidx->Value({10, 0}));
180-
ASSERT_EQ(2, sidx->Value({10, 1}));
181-
ASSERT_EQ(1, sidx->Value({10, 2}));
182-
183-
// (1, 2, 3) -> 16
184-
ASSERT_EQ(1, sidx->Value({11, 0}));
185-
ASSERT_EQ(2, sidx->Value({11, 1}));
186-
ASSERT_EQ(3, sidx->Value({11, 2}));
143+
AssertCOOIndex(sidx, 0, {0, 0, 0});
144+
AssertCOOIndex(sidx, 1, {0, 0, 2});
145+
AssertCOOIndex(sidx, 2, {0, 1, 1});
146+
AssertCOOIndex(sidx, 10, {1, 2, 1});
147+
AssertCOOIndex(sidx, 11, {1, 2, 3});
148+
}
149+
150+
TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
151+
std::vector<int64_t> shape = {2, 3, 4};
152+
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
153+
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
154+
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
155+
std::vector<int64_t> strides = {192, 64, 16};
156+
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
157+
Tensor tensor(int64(), buffer, shape, strides);
158+
SparseTensorImpl<SparseCOOIndex> st(tensor);
159+
160+
ASSERT_EQ(12, st.non_zero_length());
161+
ASSERT_TRUE(st.is_mutable());
162+
163+
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
164+
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
165+
166+
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
167+
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
168+
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
169+
ASSERT_TRUE(sidx->is_column_major());
170+
171+
AssertCOOIndex(sidx, 0, {0, 0, 0});
172+
AssertCOOIndex(sidx, 1, {0, 0, 2});
173+
AssertCOOIndex(sidx, 2, {0, 1, 1});
174+
AssertCOOIndex(sidx, 10, {1, 2, 1});
175+
AssertCOOIndex(sidx, 11, {1, 2, 3});
187176
}
188177

189178
TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
@@ -211,16 +200,10 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
211200
ASSERT_EQ("", st1.dim_name(1));
212201
ASSERT_EQ("", st1.dim_name(2));
213202

214-
const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
215-
for (int i = 0; i < 6; ++i) {
216-
ASSERT_EQ(i + 1, ptr[i]);
217-
}
218-
for (int i = 0; i < 6; ++i) {
219-
ASSERT_EQ(i + 11, ptr[i + 6]);
220-
}
203+
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
204+
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
221205

222206
const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
223-
224207
ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
225208
ASSERT_EQ(1, si.indptr()->ndim());
226209
ASSERT_EQ(1, si.indices()->ndim());
@@ -241,4 +224,40 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
241224
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
242225
}
243226

227+
TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
228+
std::vector<int64_t> shape = {6, 4};
229+
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
230+
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
231+
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
232+
std::vector<int64_t> strides = {64, 16};
233+
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
234+
Tensor tensor(int64(), buffer, shape, strides);
235+
SparseTensorImpl<SparseCSRIndex> st(tensor);
236+
237+
ASSERT_EQ(12, st.non_zero_length());
238+
ASSERT_TRUE(st.is_mutable());
239+
240+
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
241+
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
242+
243+
const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
244+
ASSERT_EQ(1, si.indptr()->ndim());
245+
ASSERT_EQ(1, si.indices()->ndim());
246+
247+
const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
248+
std::vector<int64_t> indptr_values(indptr_begin,
249+
indptr_begin + si.indptr()->shape()[0]);
250+
251+
ASSERT_EQ(7, indptr_values.size());
252+
ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
253+
254+
const int64_t* indices_begin =
255+
reinterpret_cast<const int64_t*>(si.indices()->raw_data());
256+
std::vector<int64_t> indices_values(indices_begin,
257+
indices_begin + si.indices()->shape()[0]);
258+
259+
ASSERT_EQ(12, indices_values.size());
260+
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
261+
}
262+
244263
} // namespace arrow

cpp/src/arrow/tensor-test.cc

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,16 @@ TEST(TestTensor, ZeroDimensionalTensor) {
104104
ASSERT_EQ(t.strides().size(), 1);
105105
}
106106

107-
TEST(TestNumericTensor, ElementAccess) {
107+
TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
108108
std::vector<int64_t> shape = {3, 4};
109109

110110
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
111111
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
112112
NumericTensor<Int64Type> t_i64(buffer_i64, shape);
113113

114+
ASSERT_TRUE(t_i64.is_row_major());
115+
ASSERT_FALSE(t_i64.is_column_major());
116+
ASSERT_TRUE(t_i64.is_contiguous());
114117
ASSERT_EQ(1, t_i64.Value({0, 0}));
115118
ASSERT_EQ(5, t_i64.Value({1, 0}));
116119
ASSERT_EQ(6, t_i64.Value({1, 1}));
@@ -121,22 +124,27 @@ TEST(TestNumericTensor, ElementAccess) {
121124
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
122125
NumericTensor<FloatType> t_f32(buffer_f32, shape);
123126

127+
ASSERT_TRUE(t_f32.is_row_major());
128+
ASSERT_FALSE(t_f32.is_column_major());
129+
ASSERT_TRUE(t_f32.is_contiguous());
124130
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
125131
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
126132
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
127133
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
128134
}
129135

130-
TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
136+
TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
131137
std::vector<int64_t> shape = {3, 4};
132138

133139
const int64_t i64_size = sizeof(int64_t);
134-
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
135-
8, 0, 0, 9, 10, 11, 12, 0, 0};
136-
std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
140+
std::vector<int64_t> values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
141+
std::vector<int64_t> strides_i64 = {i64_size, i64_size * 3};
137142
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
138143
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
139144

145+
ASSERT_TRUE(t_i64.is_column_major());
146+
ASSERT_FALSE(t_i64.is_row_major());
147+
ASSERT_TRUE(t_i64.is_contiguous());
140148
ASSERT_EQ(1, t_i64.Value({0, 0}));
141149
ASSERT_EQ(2, t_i64.Value({0, 1}));
142150
ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -145,13 +153,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
145153
ASSERT_EQ(11, t_i64.Value({2, 2}));
146154

147155
const int64_t f32_size = sizeof(float);
148-
std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
149-
5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
150-
9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
151-
std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
156+
std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 2.1f, 6.1f, 10.1f,
157+
3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f};
158+
std::vector<int64_t> strides_f32 = {f32_size, f32_size * 3};
152159
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
153160
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
154161

162+
ASSERT_TRUE(t_f32.is_column_major());
163+
ASSERT_FALSE(t_f32.is_row_major());
164+
ASSERT_TRUE(t_f32.is_contiguous());
155165
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
156166
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
157167
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
@@ -160,15 +170,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
160170
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
161171
}
162172

163-
TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
173+
TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) {
164174
std::vector<int64_t> shape = {3, 4};
165175

166176
const int64_t i64_size = sizeof(int64_t);
167-
std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
168-
std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
177+
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
178+
8, 0, 0, 9, 10, 11, 12, 0, 0};
179+
std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
169180
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
170181
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
171182

183+
ASSERT_FALSE(t_i64.is_contiguous());
184+
ASSERT_FALSE(t_i64.is_row_major());
185+
ASSERT_FALSE(t_i64.is_column_major());
172186
ASSERT_EQ(1, t_i64.Value({0, 0}));
173187
ASSERT_EQ(2, t_i64.Value({0, 1}));
174188
ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -177,12 +191,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
177191
ASSERT_EQ(11, t_i64.Value({2, 2}));
178192

179193
const int64_t f32_size = sizeof(float);
180-
std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
181-
3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
182-
std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
194+
std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
195+
5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
196+
9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
197+
std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
183198
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
184199
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
185200

201+
ASSERT_FALSE(t_f32.is_contiguous());
202+
ASSERT_FALSE(t_f32.is_row_major());
203+
ASSERT_FALSE(t_f32.is_column_major());
186204
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
187205
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
188206
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));

cpp/src/arrow/test-util.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
202202
ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
203203
bool same_chunk_layout = true);
204204

205+
template <typename C_TYPE>
206+
void AssertNumericDataEqual(const C_TYPE* raw_data,
207+
const std::vector<C_TYPE>& expected_values) {
208+
for (auto expected : expected_values) {
209+
ASSERT_EQ(expected, *raw_data);
210+
++raw_data;
211+
}
212+
}
213+
205214
ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);
206215

207216
// Check if the padding of the buffers of the array is zero.

0 commit comments

Comments
 (0)