Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 89 additions & 70 deletions cpp/src/arrow/sparse_tensor-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
}

static inline void AssertCOOIndex(
const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
const std::vector<int64_t>& expected_values) {
int64_t n = static_cast<int64_t>(expected_values.size());
for (int64_t i = 0; i < n; ++i) {
ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
}
}

TEST(TestSparseCOOTensor, CreationEmptyTensor) {
std::vector<int64_t> shape = {2, 3, 4};
SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
Expand Down Expand Up @@ -84,13 +93,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));

const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 1, ptr[i]);
}
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 11, ptr[i + 6]);
}
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});

const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
Expand All @@ -99,30 +103,11 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());

// (0, 0, 0) -> 1
ASSERT_EQ(0, sidx->Value({0, 0}));
ASSERT_EQ(0, sidx->Value({0, 1}));
ASSERT_EQ(0, sidx->Value({0, 2}));

// (0, 0, 2) -> 2
ASSERT_EQ(0, sidx->Value({1, 0}));
ASSERT_EQ(0, sidx->Value({1, 1}));
ASSERT_EQ(2, sidx->Value({1, 2}));

// (0, 1, 1) -> 3
ASSERT_EQ(0, sidx->Value({2, 0}));
ASSERT_EQ(1, sidx->Value({2, 1}));
ASSERT_EQ(1, sidx->Value({2, 2}));

// (1, 2, 1) -> 15
ASSERT_EQ(1, sidx->Value({10, 0}));
ASSERT_EQ(2, sidx->Value({10, 1}));
ASSERT_EQ(1, sidx->Value({10, 2}));

// (1, 2, 3) -> 16
ASSERT_EQ(1, sidx->Value({11, 0}));
ASSERT_EQ(2, sidx->Value({11, 1}));
ASSERT_EQ(3, sidx->Value({11, 2}));
AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}

TEST(TestSparseCOOTensor, CreationFromTensor) {
Expand All @@ -147,43 +132,47 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));

const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 1, ptr[i]);
}
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 11, ptr[i + 6]);
}
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});

const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());

// (0, 0, 0) -> 1
ASSERT_EQ(0, sidx->Value({0, 0}));
ASSERT_EQ(0, sidx->Value({0, 1}));
ASSERT_EQ(0, sidx->Value({0, 2}));

// (0, 0, 2) -> 2
ASSERT_EQ(0, sidx->Value({1, 0}));
ASSERT_EQ(0, sidx->Value({1, 1}));
ASSERT_EQ(2, sidx->Value({1, 2}));

// (0, 1, 1) -> 3
ASSERT_EQ(0, sidx->Value({2, 0}));
ASSERT_EQ(1, sidx->Value({2, 1}));
ASSERT_EQ(1, sidx->Value({2, 2}));

// (1, 2, 1) -> 15
ASSERT_EQ(1, sidx->Value({10, 0}));
ASSERT_EQ(2, sidx->Value({10, 1}));
ASSERT_EQ(1, sidx->Value({10, 2}));

// (1, 2, 3) -> 16
ASSERT_EQ(1, sidx->Value({11, 0}));
ASSERT_EQ(2, sidx->Value({11, 1}));
ASSERT_EQ(3, sidx->Value({11, 2}));
AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}

TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
std::vector<int64_t> strides = {192, 64, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, strides);
SparseTensorImpl<SparseCOOIndex> st(tensor);

ASSERT_EQ(12, st.non_zero_length());
ASSERT_TRUE(st.is_mutable());

const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});

const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Out of curiosity, why not row-major?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is for the zero-copy compatibility with SciPy and pydata/sparse.

See this discussion.


AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}

TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
Expand Down Expand Up @@ -211,16 +200,10 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));

const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 1, ptr[i]);
}
for (int i = 0; i < 6; ++i) {
ASSERT_EQ(i + 11, ptr[i + 6]);
}
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});

const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());

ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
ASSERT_EQ(1, si.indptr()->ndim());
ASSERT_EQ(1, si.indices()->ndim());
Expand All @@ -241,4 +224,40 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
}

TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
std::vector<int64_t> shape = {6, 4};
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
std::vector<int64_t> strides = {64, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, strides);
SparseTensorImpl<SparseCSRIndex> st(tensor);

ASSERT_EQ(12, st.non_zero_length());
ASSERT_TRUE(st.is_mutable());

const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});

const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
ASSERT_EQ(1, si.indptr()->ndim());
ASSERT_EQ(1, si.indices()->ndim());

const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
std::vector<int64_t> indptr_values(indptr_begin,
indptr_begin + si.indptr()->shape()[0]);

ASSERT_EQ(7, indptr_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could benefit from a AssertNumericDataEqual helper or similar.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


const int64_t* indices_begin =
reinterpret_cast<const int64_t*>(si.indices()->raw_data());
std::vector<int64_t> indices_values(indices_begin,
indices_begin + si.indices()->shape()[0]);

ASSERT_EQ(12, indices_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
}

} // namespace arrow
48 changes: 33 additions & 15 deletions cpp/src/arrow/tensor-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,16 @@ TEST(TestTensor, ZeroDimensionalTensor) {
ASSERT_EQ(t.strides().size(), 1);
}

TEST(TestNumericTensor, ElementAccess) {
TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
std::vector<int64_t> shape = {3, 4};

std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape);

ASSERT_TRUE(t_i64.is_row_major());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also ASSERT_FALSE(is_column_major()) and ASSERT_TRUE(is_contiguous())?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

ASSERT_FALSE(t_i64.is_column_major());
ASSERT_TRUE(t_i64.is_contiguous());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(5, t_i64.Value({1, 0}));
ASSERT_EQ(6, t_i64.Value({1, 1}));
Expand All @@ -121,22 +124,27 @@ TEST(TestNumericTensor, ElementAccess) {
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape);

ASSERT_TRUE(t_f32.is_row_major());
ASSERT_FALSE(t_f32.is_column_major());
ASSERT_TRUE(t_f32.is_contiguous());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}

TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
std::vector<int64_t> shape = {3, 4};

const int64_t i64_size = sizeof(int64_t);
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
8, 0, 0, 9, 10, 11, 12, 0, 0};
std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
std::vector<int64_t> values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
std::vector<int64_t> strides_i64 = {i64_size, i64_size * 3};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);

ASSERT_TRUE(t_i64.is_column_major());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

ASSERT_FALSE(t_i64.is_row_major());
ASSERT_TRUE(t_i64.is_contiguous());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
Expand All @@ -145,13 +153,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
ASSERT_EQ(11, t_i64.Value({2, 2}));

const int64_t f32_size = sizeof(float);
std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 2.1f, 6.1f, 10.1f,
3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f};
std::vector<int64_t> strides_f32 = {f32_size, f32_size * 3};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);

ASSERT_TRUE(t_f32.is_column_major());
ASSERT_FALSE(t_f32.is_row_major());
ASSERT_TRUE(t_f32.is_contiguous());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
Expand All @@ -160,15 +170,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}

TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) {
std::vector<int64_t> shape = {3, 4};

const int64_t i64_size = sizeof(int64_t);
std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
8, 0, 0, 9, 10, 11, 12, 0, 0};
std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);

ASSERT_FALSE(t_i64.is_contiguous());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also ASSERT_FALSE on the row_major / column_major properties?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

ASSERT_FALSE(t_i64.is_row_major());
ASSERT_FALSE(t_i64.is_column_major());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
Expand All @@ -177,12 +191,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
ASSERT_EQ(11, t_i64.Value({2, 2}));

const int64_t f32_size = sizeof(float);
std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);

ASSERT_FALSE(t_f32.is_contiguous());
ASSERT_FALSE(t_f32.is_row_major());
ASSERT_FALSE(t_f32.is_column_major());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout = true);

template <typename C_TYPE>
void AssertNumericDataEqual(const C_TYPE* raw_data,
const std::vector<C_TYPE>& expected_values) {
for (auto expected : expected_values) {
ASSERT_EQ(expected, *raw_data);
++raw_data;
}
}

ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);

// Check if the padding of the buffers of the array is zero.
Expand Down