@@ -419,6 +419,154 @@ class SparseTensorConverter<TYPE, SparseCSCIndex>
419419 inline Status CheckMaximumValue (const uint64_t ) const { return Status::OK (); }
420420};
421421
422+ // ----------------------------------------------------------------------
423+ // SparseTensorConverter for SparseCSFIndex
424+
425+ template <typename TYPE>
426+ class SparseTensorConverter <TYPE, SparseCSFIndex>
427+ : private SparseTensorConverterBase<TYPE> {
428+ public:
429+ using BaseClass = SparseTensorConverterBase<TYPE>;
430+ using typename BaseClass::NumericTensorType;
431+ using typename BaseClass::value_type;
432+
433+ SparseTensorConverter (const NumericTensorType& tensor,
434+ const std::shared_ptr<DataType>& index_value_type,
435+ MemoryPool* pool)
436+ : BaseClass(tensor, index_value_type, pool) {}
437+
438+ template <typename IndexValueType>
439+ Status Convert () {
440+ using c_index_value_type = typename IndexValueType::c_type;
441+ const int64_t indices_elsize = sizeof (c_index_value_type);
442+
443+ std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
444+ RETURN_NOT_OK (SparseCOOTensor::Make (tensor_, &sparse_coo_tensor));
445+ std::shared_ptr<Tensor> coords =
446+ arrow::internal::checked_pointer_cast<SparseCOOIndex>(
447+ sparse_coo_tensor->sparse_index ())
448+ ->indices ();
449+
450+ // Convert SparseCOOTensor to long CSF buffers
451+ const int64_t ndim = tensor_.ndim ();
452+ const int64_t nonzero_count = sparse_coo_tensor->non_zero_length ();
453+
454+ std::vector<int64_t > counts (ndim);
455+ std::fill_n (counts.begin (), ndim, static_cast <int64_t >(0 ));
456+
457+ std::vector<int64_t > axis_order (ndim);
458+ for (int64_t i = 0 ; i < ndim; ++i) axis_order[i] = i;
459+
460+ std::shared_ptr<Buffer> indices_buffer;
461+ std::shared_ptr<Buffer> indptr_buffer;
462+ RETURN_NOT_OK (
463+ AllocateBuffer (pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
464+ RETURN_NOT_OK (AllocateBuffer (pool_, indices_elsize * (ndim - 1 ) * (nonzero_count + 1 ),
465+ &indptr_buffer));
466+ int64_t * indices = reinterpret_cast <int64_t *>(indices_buffer->mutable_data ());
467+ int64_t * indptr = reinterpret_cast <int64_t *>(indptr_buffer->mutable_data ());
468+
469+ for (int64_t row = 0 ; row < nonzero_count; ++row) {
470+ bool tree_split = false ;
471+ for (int64_t column = 0 ; column < ndim; ++column) {
472+ bool change = coords->Value <IndexValueType>({row, column}) !=
473+ coords->Value <IndexValueType>({row - 1 , column});
474+
475+ if (tree_split || change || row == 0 ) {
476+ if (row > 1 ) tree_split = true ;
477+
478+ indices[column * nonzero_count + counts[column]] =
479+ coords->Value <IndexValueType>({row, column});
480+ indptr[column * (nonzero_count + 1 ) + counts[column]] = counts[column + 1 ];
481+ ++counts[column];
482+ }
483+ }
484+ }
485+
486+ for (int64_t column = 0 ; column < ndim; ++column) {
487+ indptr[column * (nonzero_count + 1 ) + counts[column]] = counts[column + 1 ];
488+ }
489+
490+ int64_t total_size = counts[0 ];
491+ for (int64_t column = 1 ; column < ndim; ++column) {
492+ for (int64_t i = 0 ; i < counts[column] + 1 ; ++i) {
493+ if (column < ndim - 1 )
494+ indptr[total_size + column + i] = indptr[column * (nonzero_count + 1 ) + i];
495+ if (i < counts[column])
496+ indices[total_size + i] = indices[column * nonzero_count + i];
497+ }
498+ total_size += counts[column];
499+ }
500+
501+ // Copy CSF index data into smaller buffers
502+ std::shared_ptr<Buffer> out_indices_buffer;
503+ std::shared_ptr<Buffer> out_indptr_buffer;
504+ RETURN_NOT_OK (
505+ AllocateBuffer (pool_, indices_elsize * total_size, &out_indices_buffer));
506+ RETURN_NOT_OK (AllocateBuffer (pool_,
507+ indices_elsize * total_size - nonzero_count + ndim - 1 ,
508+ &out_indptr_buffer));
509+ int64_t * out_indices = reinterpret_cast <int64_t *>(out_indices_buffer->mutable_data ());
510+ int64_t * out_indptr = reinterpret_cast <int64_t *>(out_indptr_buffer->mutable_data ());
511+
512+ for (int64_t i = 0 ; i < total_size; ++i) out_indices[i] = indices[i];
513+
514+ for (int64_t i = 0 ; i < total_size - nonzero_count + ndim - 1 ; ++i)
515+ out_indptr[i] = indptr[i];
516+
517+ // Construct SparseCSFTensor
518+ std::vector<int64_t > out_indptr_shape ({total_size - nonzero_count + ndim - 1 });
519+ std::shared_ptr<Tensor> out_indptr_tensor =
520+ std::make_shared<Tensor>(int64 (), out_indptr_buffer, out_indptr_shape);
521+
522+ std::vector<int64_t > out_indices_shape ({total_size});
523+ std::shared_ptr<Tensor> out_indices_tensor =
524+ std::make_shared<Tensor>(int64 (), out_indices_buffer, out_indices_shape);
525+
526+ std::vector<int64_t > indptr_offsets (ndim - 1 );
527+ std::vector<int64_t > indices_offsets (ndim);
528+ std::fill_n (indptr_offsets.begin (), ndim - 1 , static_cast <int64_t >(0 ));
529+ std::fill_n (indices_offsets.begin (), ndim, static_cast <int64_t >(0 ));
530+
531+ for (int64_t i = 0 ; i < ndim - 2 ; ++i)
532+ indptr_offsets[i + 1 ] = indptr_offsets[i] + counts[i] + 1 ;
533+
534+ for (int64_t i = 0 ; i < ndim; ++i)
535+ indices_offsets[i + 1 ] = indices_offsets[i] + counts[i];
536+
537+ sparse_index =
538+ std::make_shared<SparseCSFIndex>(out_indptr_tensor, out_indices_tensor,
539+ indptr_offsets, indices_offsets, axis_order);
540+ data = sparse_coo_tensor->data ();
541+
542+ return Status::OK ();
543+ }
544+
545+ #define CALL_TYPE_SPECIFIC_CONVERT (TYPE_CLASS ) \
546+ case TYPE_CLASS##Type::type_id: \
547+ return Convert<TYPE_CLASS##Type>();
548+
549+ Status Convert () {
550+ switch (index_value_type_->id ()) {
551+ ARROW_GENERATE_FOR_ALL_INTEGER_TYPES (CALL_TYPE_SPECIFIC_CONVERT);
552+ // LCOV_EXCL_START: The following invalid causes program failure.
553+ default :
554+ return Status::TypeError (" Unsupported SparseTensor index value type" );
555+ // LCOV_EXCL_STOP
556+ }
557+ }
558+
559+ #undef CALL_TYPE_SPECIFIC_CONVERT
560+
561+ std::shared_ptr<SparseCSFIndex> sparse_index;
562+ std::shared_ptr<Buffer> data;
563+
564+ private:
565+ using BaseClass::index_value_type_;
566+ using BaseClass::pool_;
567+ using BaseClass::tensor_;
568+ };
569+
422570// ----------------------------------------------------------------------
423571// Instantiate templates
424572
@@ -502,7 +650,8 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
502650 return MakeSparseTensorFromTensor<SparseCSCIndex>(tensor, index_value_type, pool,
503651 out_sparse_index, out_data);
504652 case SparseTensorFormat::CSF:
505- return Status::Invalid (" Unsupported Tensor value type" );
653+ return MakeSparseTensorFromTensor<SparseCSFIndex>(tensor, index_value_type, pool,
654+ out_sparse_index, out_data);
506655
507656 // LCOV_EXCL_START: ignore program failure
508657 default :
@@ -812,7 +961,7 @@ SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
812961 const std::vector<int64_t >& indptr_offsets,
813962 const std::vector<int64_t >& indices_offsets,
814963 const std::vector<int64_t >& axis_order)
815- : SparseIndexBase(indices->shape ()[0] - indices_offsets.back()),
964+ : SparseIndexBase(indices->size () - indices_offsets.back()),
816965 indptr_(indptr),
817966 indices_(indices),
818967 indptr_offsets_(indptr_offsets),
0 commit comments