ARROW-4320: [C++] Add tests for non-contiguous tensors

mrkn · pitrou · commit 442ced05fd89 · 2019-01-28T19:28:13.000+01:00
I would like to add some test cases for tensors with non-contiguous strides. Author: Kenta Murata <mrkn@mrkn.jp> Author: Antoine Pitrou <antoine@python.org> Closes #3453 from mrkn/add_strided_numeric_tensor_tests and squashes the following commits: 5841794 <Antoine Pitrou> Remove ARROW_EXPORT from template function definition. 53179ea <Kenta Murata> Fix compilation error f153e0b <Kenta Murata> make format 8cfac94 <Kenta Murata> Refactoring: extract AssertNumericDataEqual 20c8728 <Kenta Murata> Refactoring: extract AssertCOOIndex function c9767f7 <Kenta Murata> Add assertions to examine the strides layout 4cfef06 <Kenta Murata> Add a new test of csr sparse matrix creation from non-contiguous tensor d9f32f1 <Kenta Murata> Add a new test of coo sparse tensor creation from non-contiguous tensor 0c1573c <Kenta Murata> Fix NumericTensor tests
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
@@ -38,6 +38,15 @@ static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
   ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
 }
 
+static inline void AssertCOOIndex(
+    const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
+    const std::vector<int64_t>& expected_values) {
+  int64_t n = static_cast<int64_t>(expected_values.size());
+  for (int64_t i = 0; i < n; ++i) {
+    ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
+  }
+}
+
 TEST(TestSparseCOOTensor, CreationEmptyTensor) {
   std::vector<int64_t> shape = {2, 3, 4};
   SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
@@ -84,13 +93,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   ASSERT_EQ("", st1.dim_name(1));
   ASSERT_EQ("", st1.dim_name(2));
 
-  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 1, ptr[i]);
-  }
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 11, ptr[i + 6]);
-  }
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
 
   const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
   ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
@@ -99,30 +103,11 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
 
-  // (0, 0, 0) -> 1
-  ASSERT_EQ(0, sidx->Value({0, 0}));
-  ASSERT_EQ(0, sidx->Value({0, 1}));
-  ASSERT_EQ(0, sidx->Value({0, 2}));
-
-  // (0, 0, 2) -> 2
-  ASSERT_EQ(0, sidx->Value({1, 0}));
-  ASSERT_EQ(0, sidx->Value({1, 1}));
-  ASSERT_EQ(2, sidx->Value({1, 2}));
-
-  // (0, 1, 1) -> 3
-  ASSERT_EQ(0, sidx->Value({2, 0}));
-  ASSERT_EQ(1, sidx->Value({2, 1}));
-  ASSERT_EQ(1, sidx->Value({2, 2}));
-
-  // (1, 2, 1) -> 15
-  ASSERT_EQ(1, sidx->Value({10, 0}));
-  ASSERT_EQ(2, sidx->Value({10, 1}));
-  ASSERT_EQ(1, sidx->Value({10, 2}));
-
-  // (1, 2, 3) -> 16
-  ASSERT_EQ(1, sidx->Value({11, 0}));
-  ASSERT_EQ(2, sidx->Value({11, 1}));
-  ASSERT_EQ(3, sidx->Value({11, 2}));
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
 }
 
 TEST(TestSparseCOOTensor, CreationFromTensor) {
@@ -147,43 +132,47 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
   ASSERT_EQ("", st1.dim_name(1));
   ASSERT_EQ("", st1.dim_name(2));
 
-  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 1, ptr[i]);
-  }
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 11, ptr[i + 6]);
-  }
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
 
   const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
   std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
 
-  // (0, 0, 0) -> 1
-  ASSERT_EQ(0, sidx->Value({0, 0}));
-  ASSERT_EQ(0, sidx->Value({0, 1}));
-  ASSERT_EQ(0, sidx->Value({0, 2}));
-
-  // (0, 0, 2) -> 2
-  ASSERT_EQ(0, sidx->Value({1, 0}));
-  ASSERT_EQ(0, sidx->Value({1, 1}));
-  ASSERT_EQ(2, sidx->Value({1, 2}));
-
-  // (0, 1, 1) -> 3
-  ASSERT_EQ(0, sidx->Value({2, 0}));
-  ASSERT_EQ(1, sidx->Value({2, 1}));
-  ASSERT_EQ(1, sidx->Value({2, 2}));
-
-  // (1, 2, 1) -> 15
-  ASSERT_EQ(1, sidx->Value({10, 0}));
-  ASSERT_EQ(2, sidx->Value({10, 1}));
-  ASSERT_EQ(1, sidx->Value({10, 2}));
-
-  // (1, 2, 3) -> 16
-  ASSERT_EQ(1, sidx->Value({11, 0}));
-  ASSERT_EQ(2, sidx->Value({11, 1}));
-  ASSERT_EQ(3, sidx->Value({11, 2}));
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
+}
+
+TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1,  0, 0, 0, 2,  0, 0, 0, 0, 0, 3,  0, 0, 0, 4,  0,
+                                 5,  0, 0, 0, 6,  0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+                                 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+  std::vector<int64_t> strides = {192, 64, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, strides);
+  SparseTensorImpl<SparseCOOIndex> st(tensor);
+
+  ASSERT_EQ(12, st.non_zero_length());
+  ASSERT_TRUE(st.is_mutable());
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  AssertCOOIndex(sidx, 0, {0, 0, 0});
+  AssertCOOIndex(sidx, 1, {0, 0, 2});
+  AssertCOOIndex(sidx, 2, {0, 1, 1});
+  AssertCOOIndex(sidx, 10, {1, 2, 1});
+  AssertCOOIndex(sidx, 11, {1, 2, 3});
 }
 
 TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
@@ -211,16 +200,10 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   ASSERT_EQ("", st1.dim_name(1));
   ASSERT_EQ("", st1.dim_name(2));
 
-  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 1, ptr[i]);
-  }
-  for (int i = 0; i < 6; ++i) {
-    ASSERT_EQ(i + 11, ptr[i + 6]);
-  }
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
 
   const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
-
   ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
   ASSERT_EQ(1, si.indptr()->ndim());
   ASSERT_EQ(1, si.indices()->ndim());
@@ -241,4 +224,40 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
 }
 
+TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
+  std::vector<int64_t> shape = {6, 4};
+  std::vector<int64_t> values = {1,  0, 0, 0, 2,  0, 0, 0, 0, 0, 3,  0, 0, 0, 4,  0,
+                                 5,  0, 0, 0, 6,  0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+                                 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+  std::vector<int64_t> strides = {64, 16};
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, strides);
+  SparseTensorImpl<SparseCSRIndex> st(tensor);
+
+  ASSERT_EQ(12, st.non_zero_length());
+  ASSERT_TRUE(st.is_mutable());
+
+  const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+  AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+  const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
+  ASSERT_EQ(1, si.indptr()->ndim());
+  ASSERT_EQ(1, si.indices()->ndim());
+
+  const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
+  std::vector<int64_t> indptr_values(indptr_begin,
+                                     indptr_begin + si.indptr()->shape()[0]);
+
+  ASSERT_EQ(7, indptr_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
+
+  const int64_t* indices_begin =
+      reinterpret_cast<const int64_t*>(si.indices()->raw_data());
+  std::vector<int64_t> indices_values(indices_begin,
+                                      indices_begin + si.indices()->shape()[0]);
+
+  ASSERT_EQ(12, indices_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
@@ -104,13 +104,16 @@ TEST(TestTensor, ZeroDimensionalTensor) {
   ASSERT_EQ(t.strides().size(), 1);
 }
 
-TEST(TestNumericTensor, ElementAccess) {
+TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape);
 
+  ASSERT_TRUE(t_i64.is_row_major());
+  ASSERT_FALSE(t_i64.is_column_major());
+  ASSERT_TRUE(t_i64.is_contiguous());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(5, t_i64.Value({1, 0}));
   ASSERT_EQ(6, t_i64.Value({1, 1}));
@@ -121,22 +124,27 @@ TEST(TestNumericTensor, ElementAccess) {
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape);
 
+  ASSERT_TRUE(t_f32.is_row_major());
+  ASSERT_FALSE(t_f32.is_column_major());
+  ASSERT_TRUE(t_f32.is_contiguous());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
   ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
 }
 
-TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   const int64_t i64_size = sizeof(int64_t);
-  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
-                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
-  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
+  std::vector<int64_t> values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
+  std::vector<int64_t> strides_i64 = {i64_size, i64_size * 3};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
 
+  ASSERT_TRUE(t_i64.is_column_major());
+  ASSERT_FALSE(t_i64.is_row_major());
+  ASSERT_TRUE(t_i64.is_contiguous());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(2, t_i64.Value({0, 1}));
   ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -145,13 +153,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   ASSERT_EQ(11, t_i64.Value({2, 2}));
 
   const int64_t f32_size = sizeof(float);
-  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
-                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
-                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
-  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
+  std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f,  2.1f, 6.1f, 10.1f,
+                                   3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f};
+  std::vector<int64_t> strides_f32 = {f32_size, f32_size * 3};
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
 
+  ASSERT_TRUE(t_f32.is_column_major());
+  ASSERT_FALSE(t_f32.is_row_major());
+  ASSERT_TRUE(t_f32.is_contiguous());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
   ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
@@ -160,15 +170,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
 }
 
-TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   const int64_t i64_size = sizeof(int64_t);
-  std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
-  std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
+                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
+  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
   std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
   NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
 
+  ASSERT_FALSE(t_i64.is_contiguous());
+  ASSERT_FALSE(t_i64.is_row_major());
+  ASSERT_FALSE(t_i64.is_column_major());
   ASSERT_EQ(1, t_i64.Value({0, 0}));
   ASSERT_EQ(2, t_i64.Value({0, 1}));
   ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -177,12 +191,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
   ASSERT_EQ(11, t_i64.Value({2, 2}));
 
   const int64_t f32_size = sizeof(float);
-  std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f,  0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
-                                   3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
-  std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
+  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
+                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
+                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
+  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
   std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
   NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
 
+  ASSERT_FALSE(t_f32.is_contiguous());
+  ASSERT_FALSE(t_f32.is_row_major());
+  ASSERT_FALSE(t_f32.is_column_major());
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
   ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
   ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
@@ -202,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
 ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
                                     bool same_chunk_layout = true);
 
+template <typename C_TYPE>
+void AssertNumericDataEqual(const C_TYPE* raw_data,
+                            const std::vector<C_TYPE>& expected_values) {
+  for (auto expected : expected_values) {
+    ASSERT_EQ(expected, *raw_data);
+    ++raw_data;
+  }
+}
+
 ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);
 
 // Check if the padding of the buffers of the array is zero.