Skip to content

Commit 6f4f4a8

Browse files
committed
Implementing feedback review.
1 parent 28d38cb commit 6f4f4a8

File tree

3 files changed

+196
-334
lines changed

3 files changed

+196
-334
lines changed

cpp/src/arrow/sparse_tensor.cc

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include "arrow/sparse_tensor.h"
1919

20-
#include <arrow/util/sort.h>
2120
#include <algorithm>
2221
#include <functional>
2322
#include <limits>
@@ -28,6 +27,7 @@
2827
#include "arrow/compare.h"
2928
#include "arrow/util/checked_cast.h"
3029
#include "arrow/util/logging.h"
30+
#include "arrow/util/sort.h"
3131
#include "arrow/visitor_inline.h"
3232

3333
namespace arrow {
@@ -130,7 +130,6 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
130130
*indices++ = static_cast<c_index_value_type>(coord[i]);
131131
}
132132
}
133-
134133
// increment index
135134
++coord[ndim - 1];
136135
if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
@@ -443,6 +442,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
443442
RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
444443

445444
const int64_t ndim = tensor_.ndim();
445+
// Axis order as ascending order of dimension size is a good heuristic but is not
446+
// necessarily optimal.
446447
std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
447448
int64_t nonzero_count = -1;
448449
RETURN_NOT_OK(tensor_.CountNonZero(&nonzero_count));
@@ -465,7 +466,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
465466
for (int64_t n = tensor_.size(); n > 0; n--) {
466467
const value_type x = tensor_.Value(coord);
467468

468-
if (tensor_.Value(coord) != 0) {
469+
if (x != 0) {
469470
bool tree_split = false;
470471
*values++ = x;
471472

@@ -476,24 +477,25 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
476477
if (tree_split || change) {
477478
if (change) tree_split = true;
478479

479-
if (i < ndim - 1)
480+
if (i < ndim - 1) {
480481
RETURN_NOT_OK(indptr_buffer_builders[i].Append(
481-
static_cast<c_index_value_type>(counts[dimension + 1])));
482+
static_cast<c_index_value_type>(counts[i + 1])));
483+
}
482484
RETURN_NOT_OK(indices_buffer_builders[i].Append(
483485
static_cast<c_index_value_type>(coord[dimension])));
484-
++counts[dimension];
486+
++counts[i];
485487
}
486488
}
487489
previous_coord = coord;
488490
}
489-
490491
// increment index
491-
++coord[ndim - 1];
492-
if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
492+
int64_t last_axis = axis_order[ndim - 1];
493+
++coord[last_axis];
494+
if (n > 1 && coord[last_axis] == shape[last_axis]) {
493495
int64_t d = ndim - 1;
494-
while (d > 0 && coord[d] == shape[d]) {
495-
coord[d] = 0;
496-
++coord[d - 1];
496+
while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
497+
coord[axis_order[d]] = 0;
498+
++coord[axis_order[d - 1]];
497499
--d;
498500
}
499501
}
@@ -513,12 +515,13 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
513515
std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
514516
std::vector<int64_t> indices_shapes = counts;
515517

516-
for (int64_t column = 0; column < ndim; ++column)
518+
for (int64_t column = 0; column < ndim; ++column) {
517519
RETURN_NOT_OK(
518520
indices_buffer_builders[column].Finish(&indices_buffers[column], true));
519-
520-
for (int64_t column = 0; column < ndim - 1; ++column)
521+
}
522+
for (int64_t column = 0; column < ndim - 1; ++column) {
521523
RETURN_NOT_OK(indptr_buffer_builders[column].Finish(&indptr_buffers[column], true));
524+
}
522525

523526
ARROW_ASSIGN_OR_RAISE(
524527
sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
@@ -665,8 +668,7 @@ namespace {
665668
template <typename TYPE, typename IndexValueType>
666669
void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t first_ptr,
667670
int64_t last_ptr, const SparseCSFIndex* sparse_index,
668-
const int64_t* raw_data,
669-
const std::vector<int64_t> strides,
671+
const TYPE* raw_data, const std::vector<int64_t> strides,
670672
const std::vector<int64_t> axis_order, TYPE* out) {
671673
int64_t ndim = axis_order.size();
672674

@@ -675,14 +677,15 @@ void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t firs
675677
offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
676678
strides[axis_order[dimension]];
677679

678-
if (dimension < ndim - 1)
680+
if (dimension < ndim - 1) {
679681
ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
680682
dimension + 1, tmp_offset,
681683
sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
682684
sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
683685
raw_data, strides, axis_order, out);
684-
else
685-
out[tmp_offset] = static_cast<TYPE>(raw_data[i]);
686+
} else {
687+
out[tmp_offset] = raw_data[i];
688+
}
686689
}
687690
}
688691

@@ -703,8 +706,10 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
703706
std::fill_n(values, sparse_tensor->size(), static_cast<value_type>(0));
704707

705708
std::vector<int64_t> strides(sparse_tensor->ndim(), 1);
706-
for (int i = sparse_tensor->ndim() - 1; i > 0; --i)
709+
for (int i = sparse_tensor->ndim() - 1; i > 0; --i) {
707710
strides[i - 1] *= strides[i] * sparse_tensor->shape()[i];
711+
}
712+
std::vector<int64_t> empty_strides;
708713

709714
const auto raw_data = reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
710715

@@ -724,7 +729,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
724729
values[offset] = raw_data[i];
725730
}
726731
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
727-
sparse_tensor->shape());
732+
sparse_tensor->shape(), empty_strides,
733+
sparse_tensor->dim_names());
728734
return Status::OK();
729735
}
730736

@@ -744,7 +750,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
744750
}
745751
}
746752
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
747-
sparse_tensor->shape());
753+
sparse_tensor->shape(), empty_strides,
754+
sparse_tensor->dim_names());
748755
return Status::OK();
749756
}
750757

@@ -764,7 +771,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
764771
}
765772
}
766773
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
767-
sparse_tensor->shape());
774+
sparse_tensor->shape(), empty_strides,
775+
sparse_tensor->dim_names());
768776
return Status::OK();
769777
}
770778

@@ -773,11 +781,11 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
773781
internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
774782

775783
ExpandSparseCSFTensorValues<value_type, IndexValueType>(
776-
0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index,
777-
reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides,
784+
0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index, raw_data, strides,
778785
sparse_index.axis_order(), values);
779786
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
780-
sparse_tensor->shape());
787+
sparse_tensor->shape(), empty_strides,
788+
sparse_tensor->dim_names());
781789
return Status::OK();
782790
}
783791
}

0 commit comments

Comments
 (0)