1717
1818#include " arrow/sparse_tensor.h"
1919
20- #include < arrow/util/sort.h>
2120#include < algorithm>
2221#include < functional>
2322#include < limits>
2827#include " arrow/compare.h"
2928#include " arrow/util/checked_cast.h"
3029#include " arrow/util/logging.h"
30+ #include " arrow/util/sort.h"
3131#include " arrow/visitor_inline.h"
3232
3333namespace arrow {
@@ -130,7 +130,6 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
130130 *indices++ = static_cast <c_index_value_type>(coord[i]);
131131 }
132132 }
133-
134133 // increment index
135134 ++coord[ndim - 1 ];
136135 if (n > 1 && coord[ndim - 1 ] == shape[ndim - 1 ]) {
@@ -443,6 +442,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
443442 RETURN_NOT_OK (CheckMaximumValue (std::numeric_limits<c_index_value_type>::max ()));
444443
445444 const int64_t ndim = tensor_.ndim ();
445+ // Axis order as ascending order of dimension size is a good heuristic but is not
446+ // necessarily optimal.
446447 std::vector<int64_t > axis_order = internal::ArgSort (tensor_.shape ());
447448 int64_t nonzero_count = -1 ;
448449 RETURN_NOT_OK (tensor_.CountNonZero (&nonzero_count));
@@ -465,7 +466,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
465466 for (int64_t n = tensor_.size (); n > 0 ; n--) {
466467 const value_type x = tensor_.Value (coord);
467468
468- if (tensor_. Value (coord) != 0 ) {
469+ if (x != 0 ) {
469470 bool tree_split = false ;
470471 *values++ = x;
471472
@@ -476,24 +477,25 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
476477 if (tree_split || change) {
477478 if (change) tree_split = true ;
478479
479- if (i < ndim - 1 )
480+ if (i < ndim - 1 ) {
480481 RETURN_NOT_OK (indptr_buffer_builders[i].Append (
481- static_cast <c_index_value_type>(counts[dimension + 1 ])));
482+ static_cast <c_index_value_type>(counts[i + 1 ])));
483+ }
482484 RETURN_NOT_OK (indices_buffer_builders[i].Append (
483485 static_cast <c_index_value_type>(coord[dimension])));
484- ++counts[dimension ];
486+ ++counts[i ];
485487 }
486488 }
487489 previous_coord = coord;
488490 }
489-
490491 // increment index
491- ++coord[ndim - 1 ];
492- if (n > 1 && coord[ndim - 1 ] == shape[ndim - 1 ]) {
492+ int64_t last_axis = axis_order[ndim - 1 ];
493+ ++coord[last_axis];
494+ if (n > 1 && coord[last_axis] == shape[last_axis]) {
493495 int64_t d = ndim - 1 ;
494- while (d > 0 && coord[d] == shape[d ]) {
495- coord[d ] = 0 ;
496- ++coord[d - 1 ];
496+ while (d > 0 && coord[axis_order[d]] == shape[axis_order[d] ]) {
497+ coord[axis_order[d] ] = 0 ;
498+ ++coord[axis_order[ d - 1 ] ];
497499 --d;
498500 }
499501 }
@@ -513,12 +515,13 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
513515 std::vector<int64_t > indptr_shapes (counts.begin (), counts.end () - 1 );
514516 std::vector<int64_t > indices_shapes = counts;
515517
516- for (int64_t column = 0 ; column < ndim; ++column)
518+ for (int64_t column = 0 ; column < ndim; ++column) {
517519 RETURN_NOT_OK (
518520 indices_buffer_builders[column].Finish (&indices_buffers[column], true ));
519-
520- for (int64_t column = 0 ; column < ndim - 1 ; ++column)
521+ }
522+ for (int64_t column = 0 ; column < ndim - 1 ; ++column) {
521523 RETURN_NOT_OK (indptr_buffer_builders[column].Finish (&indptr_buffers[column], true ));
524+ }
522525
523526 ARROW_ASSIGN_OR_RAISE (
524527 sparse_index, SparseCSFIndex::Make (index_value_type_, indices_shapes, axis_order,
@@ -665,8 +668,7 @@ namespace {
665668template <typename TYPE, typename IndexValueType>
666669void ExpandSparseCSFTensorValues (int64_t dimension, int64_t offset, int64_t first_ptr,
667670 int64_t last_ptr, const SparseCSFIndex* sparse_index,
668- const int64_t * raw_data,
669- const std::vector<int64_t > strides,
671+ const TYPE* raw_data, const std::vector<int64_t > strides,
670672 const std::vector<int64_t > axis_order, TYPE* out) {
671673 int64_t ndim = axis_order.size ();
672674
@@ -675,14 +677,15 @@ void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t firs
675677 offset + sparse_index->indices ()[dimension]->Value <IndexValueType>({i}) *
676678 strides[axis_order[dimension]];
677679
678- if (dimension < ndim - 1 )
680+ if (dimension < ndim - 1 ) {
679681 ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
680682 dimension + 1 , tmp_offset,
681683 sparse_index->indptr ()[dimension]->Value <IndexValueType>({i}),
682684 sparse_index->indptr ()[dimension]->Value <IndexValueType>({i + 1 }), sparse_index,
683685 raw_data, strides, axis_order, out);
684- else
685- out[tmp_offset] = static_cast <TYPE>(raw_data[i]);
686+ } else {
687+ out[tmp_offset] = raw_data[i];
688+ }
686689 }
687690}
688691
@@ -703,8 +706,10 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
703706 std::fill_n (values, sparse_tensor->size (), static_cast <value_type>(0 ));
704707
705708 std::vector<int64_t > strides (sparse_tensor->ndim (), 1 );
706- for (int i = sparse_tensor->ndim () - 1 ; i > 0 ; --i)
709+ for (int i = sparse_tensor->ndim () - 1 ; i > 0 ; --i) {
707710 strides[i - 1 ] *= strides[i] * sparse_tensor->shape ()[i];
711+ }
712+ std::vector<int64_t > empty_strides;
708713
709714 const auto raw_data = reinterpret_cast <const value_type*>(sparse_tensor->raw_data ());
710715
@@ -724,7 +729,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
724729 values[offset] = raw_data[i];
725730 }
726731 *out = std::make_shared<Tensor>(sparse_tensor->type (), values_buffer,
727- sparse_tensor->shape ());
732+ sparse_tensor->shape (), empty_strides,
733+ sparse_tensor->dim_names ());
728734 return Status::OK ();
729735 }
730736
@@ -744,7 +750,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
744750 }
745751 }
746752 *out = std::make_shared<Tensor>(sparse_tensor->type (), values_buffer,
747- sparse_tensor->shape ());
753+ sparse_tensor->shape (), empty_strides,
754+ sparse_tensor->dim_names ());
748755 return Status::OK ();
749756 }
750757
@@ -764,7 +771,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
764771 }
765772 }
766773 *out = std::make_shared<Tensor>(sparse_tensor->type (), values_buffer,
767- sparse_tensor->shape ());
774+ sparse_tensor->shape (), empty_strides,
775+ sparse_tensor->dim_names ());
768776 return Status::OK ();
769777 }
770778
@@ -773,11 +781,11 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
773781 internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index ());
774782
775783 ExpandSparseCSFTensorValues<value_type, IndexValueType>(
776- 0 , 0 , 0 , sparse_index.indptr ()[0 ]->size () - 1 , &sparse_index,
777- reinterpret_cast <const int64_t *>(sparse_tensor->raw_data ()), strides,
784+ 0 , 0 , 0 , sparse_index.indptr ()[0 ]->size () - 1 , &sparse_index, raw_data, strides,
778785 sparse_index.axis_order (), values);
779786 *out = std::make_shared<Tensor>(sparse_tensor->type (), values_buffer,
780- sparse_tensor->shape ());
787+ sparse_tensor->shape (), empty_strides,
788+ sparse_tensor->dim_names ());
781789 return Status::OK ();
782790 }
783791 }
0 commit comments