Skip to content

Commit 1b922f6

Browse files
committed
Implementing review feedback.
1 parent 11b81bb commit 1b922f6

File tree

4 files changed

+47
-64
lines changed

4 files changed

+47
-64
lines changed

cpp/src/arrow/python/serialize.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -664,11 +664,12 @@ Status CountSparseTensors(
664664
case SparseTensorFormat::CSR:
665665
++num_csr;
666666
break;
667-
case SparseTensorFormat::CSC:
668-
// TODO(mrkn): support csc
669667
case SparseTensorFormat::CSF:
670668
++num_csf;
671669
break;
670+
case SparseTensorFormat::CSC:
671+
// TODO(mrkn): support csc
672+
break;
672673
}
673674
}
674675

cpp/src/arrow/sparse_tensor.cc

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ class SparseTensorConverter {
6060
// ----------------------------------------------------------------------
6161
// IncrementIndex for SparseCOOIndex and SparseCSFIndex
6262

63-
void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape) {
63+
inline void IncrementIndex(std::vector<int64_t>& coord,
64+
const std::vector<int64_t>& shape) {
6465
const int64_t ndim = shape.size();
6566
++coord[ndim - 1];
6667
if (coord[ndim - 1] == shape[ndim - 1]) {
@@ -73,8 +74,8 @@ void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shap
7374
}
7475
}
7576

76-
void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape,
77-
std::vector<int64_t> axis_order) {
77+
inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t>& shape,
78+
const std::vector<int64_t>& axis_order) {
7879
const int64_t ndim = shape.size();
7980
const int64_t last_axis = axis_order[ndim - 1];
8081
++coord[last_axis];
@@ -161,9 +162,7 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
161162
*indices++ = static_cast<c_index_value_type>(coord[i]);
162163
}
163164
}
164-
if (n > 1) {
165-
IncrementIndex(coord, shape);
166-
}
165+
IncrementIndex(coord, shape);
167166
}
168167
}
169168

@@ -496,11 +495,9 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
496495

497496
for (int64_t i = 0; i < ndim; ++i) {
498497
int64_t dimension = axis_order[i];
499-
bool change = coord[dimension] != previous_coord[dimension];
500-
501-
if (tree_split || change) {
502-
if (change) tree_split = true;
503498

499+
tree_split = tree_split || (coord[dimension] != previous_coord[dimension]);
500+
if (tree_split) {
504501
if (i < ndim - 1) {
505502
RETURN_NOT_OK(indptr_buffer_builders[i].Append(
506503
static_cast<c_index_value_type>(counts[i + 1])));
@@ -512,9 +509,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
512509
}
513510
previous_coord = coord;
514511
}
515-
if (n > 1) {
516-
IncrementIndex(coord, shape, axis_order);
517-
}
512+
IncrementIndex(coord, shape, axis_order);
518513
}
519514
}
520515

@@ -682,25 +677,26 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
682677
namespace {
683678

684679
template <typename TYPE, typename IndexValueType>
685-
void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t first_ptr,
686-
int64_t last_ptr, const SparseCSFIndex* sparse_index,
687-
const TYPE* raw_data, const std::vector<int64_t> strides,
688-
const std::vector<int64_t> axis_order, TYPE* out) {
680+
void ExpandSparseCSFTensorValues(int64_t dimension, int64_t dense_offset,
681+
int64_t first_ptr, int64_t last_ptr,
682+
const SparseCSFIndex& sparse_index, const TYPE* raw_data,
683+
const std::vector<int64_t>& strides,
684+
const std::vector<int64_t>& axis_order, TYPE* out) {
689685
int64_t ndim = axis_order.size();
690686

691687
for (int64_t i = first_ptr; i < last_ptr; ++i) {
692-
int64_t tmp_offset =
693-
offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
694-
strides[axis_order[dimension]];
688+
int64_t tmp_dense_offset =
689+
dense_offset + sparse_index.indices()[dimension]->Value<IndexValueType>({i}) *
690+
strides[axis_order[dimension]];
695691

696692
if (dimension < ndim - 1) {
697693
ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
698-
dimension + 1, tmp_offset,
699-
sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
700-
sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
694+
dimension + 1, tmp_dense_offset,
695+
sparse_index.indptr()[dimension]->Value<IndexValueType>({i}),
696+
sparse_index.indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
701697
raw_data, strides, axis_order, out);
702698
} else {
703-
out[tmp_offset] = raw_data[i];
699+
out[tmp_dense_offset] = raw_data[i];
704700
}
705701
}
706702
}
@@ -797,7 +793,7 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
797793
internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
798794

799795
ExpandSparseCSFTensorValues<value_type, IndexValueType>(
800-
0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index, raw_data, strides,
796+
0, 0, 0, sparse_index.indptr()[0]->size() - 1, sparse_index, raw_data, strides,
801797
sparse_index.axis_order(), values);
802798
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
803799
sparse_tensor->shape(), empty_strides,
@@ -995,11 +991,11 @@ inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indpt
995991
}
996992
if (num_indptrs + 1 != num_indices) {
997993
return Status::Invalid(
998-
"Length of indices must be equal to length of inptrs + 1 for SparseCSFIndex.");
994+
"Length of indices must be equal to length of indptrs + 1 for SparseCSFIndex.");
999995
}
1000996
if (axis_order_size != num_indices) {
1001997
return Status::Invalid(
1002-
"Length of indices must be equal number of dimensions for SparseCSFIndex.");
998+
"Length of indices must be equal to number of dimensions for SparseCSFIndex.");
1003999
}
10041000
return Status::OK();
10051001
}
@@ -1045,6 +1041,16 @@ SparseCSFIndex::SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
10451041

10461042
std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
10471043

1044+
bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
1045+
for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
1046+
if (!indices()[i]->Equals(*other.indices()[i])) return false;
1047+
}
1048+
for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
1049+
if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
1050+
}
1051+
return axis_order() == other.axis_order();
1052+
}
1053+
10481054
// ----------------------------------------------------------------------
10491055
// SparseTensor
10501056

cpp/src/arrow/sparse_tensor.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,10 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
372372
std::vector<std::shared_ptr<Tensor>>& indices,
373373
const std::vector<int64_t>& axis_order);
374374

375-
/// \brief Return a 1D tensor of indptr vector
375+
/// \brief Return a 1D vector of indptr tensors
376376
const std::vector<std::shared_ptr<Tensor>>& indptr() const { return indptr_; }
377377

378-
/// \brief Return a 1D tensor of indices vector
378+
/// \brief Return a 1D vector of indices tensors
379379
const std::vector<std::shared_ptr<Tensor>>& indices() const { return indices_; }
380380

381381
/// \brief Return a 1D vector specifying the order of axes
@@ -385,13 +385,7 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
385385
std::string ToString() const override;
386386

387387
/// \brief Return whether the CSF indices are equal
388-
bool Equals(const SparseCSFIndex& other) const {
389-
for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i)
390-
if (!indices()[i]->Equals(*other.indices()[i])) return false;
391-
for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i)
392-
if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
393-
return axis_order() == other.axis_order();
394-
}
388+
bool Equals(const SparseCSFIndex& other) const;
395389

396390
protected:
397391
std::vector<std::shared_ptr<Tensor>> indptr_;

cpp/src/arrow/sparse_tensor_test.cc

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626

2727
#include <gtest/gtest.h>
2828

29-
#include <arrow/util/sort.h>
3029
#include "arrow/sparse_tensor.h"
3130
#include "arrow/testing/gtest_util.h"
3231
#include "arrow/testing/util.h"
3332
#include "arrow/type.h"
33+
#include "arrow/util/sort.h"
3434

3535
namespace arrow {
3636

@@ -949,7 +949,7 @@ class TestSparseCSFTensorForIndexValueType
949949
: public TestSparseCSFTensorBase<IndexValueType> {
950950
protected:
951951
std::shared_ptr<SparseCSFIndex> MakeSparseCSFIndex(
952-
const std::vector<int64_t> axis_order,
952+
const std::vector<int64_t>& axis_order,
953953
std::vector<std::vector<typename IndexValueType::c_type>>& indptr_values,
954954
std::vector<std::vector<typename IndexValueType::c_type>>& indices_values) const {
955955
int64_t ndim = axis_order.size();
@@ -972,7 +972,8 @@ class TestSparseCSFTensorForIndexValueType
972972
template <typename CValueType>
973973
std::shared_ptr<SparseCSFTensor> MakeSparseTensor(
974974
const std::shared_ptr<SparseCSFIndex>& si, std::vector<CValueType>& sparse_values,
975-
const std::vector<int64_t> shape, const std::vector<std::string> dim_names) const {
975+
const std::vector<int64_t>& shape,
976+
const std::vector<std::string>& dim_names) const {
976977
auto data_buffer = Buffer::Wrap(sparse_values);
977978
return std::make_shared<SparseCSFTensor>(
978979
si, CTypeTraits<CValueType>::type_singleton(), data_buffer, shape, dim_names);
@@ -1001,29 +1002,10 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor) {
10011002
}
10021003

10031004
TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestTensorToSparseTensor) {
1004-
using IndexValueType = TypeParam;
1005-
std::vector<int64_t> shape = {2, 3, 4, 5};
1006-
int16_t dense_values[2][3][4][5] = {}; // zero-initialized
1007-
dense_values[0][0][0][1] = 1;
1008-
dense_values[0][0][0][2] = 2;
1009-
dense_values[0][1][0][0] = 3;
1010-
dense_values[0][1][0][2] = 4;
1011-
dense_values[0][1][1][0] = 5;
1012-
dense_values[1][1][1][0] = 6;
1013-
dense_values[1][1][1][1] = 7;
1014-
dense_values[1][1][1][2] = 8;
1015-
auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
1016-
Tensor dense_tensor(int16(), dense_buffer, shape, {}, this->dim_names_);
1017-
1018-
std::shared_ptr<SparseCSFTensor> sparse_tensor;
1019-
ASSERT_OK_AND_ASSIGN(
1020-
sparse_tensor,
1021-
SparseCSFTensor::Make(dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
1022-
1023-
ASSERT_EQ(8, sparse_tensor->non_zero_length());
1024-
ASSERT_TRUE(sparse_tensor->is_mutable());
1025-
ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
1026-
ASSERT_EQ(sparse_tensor->dim_names(), dense_tensor.dim_names());
1005+
std::vector<std::string> dim_names = {"a", "b", "c", "d"};
1006+
ASSERT_EQ(8, this->sparse_tensor_from_dense_->non_zero_length());
1007+
ASSERT_TRUE(this->sparse_tensor_from_dense_->is_mutable());
1008+
ASSERT_EQ(dim_names, this->sparse_tensor_from_dense_->dim_names());
10271009
}
10281010

10291011
TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {

0 commit comments

Comments
 (0)