Skip to content
This repository was archived by the owner on Oct 21, 2024. It is now read-only.

Commit 7d17995

Browse files
committed
Adding Tensor to SparseCSFTensor conversion.
1 parent 05a47a5 commit 7d17995

File tree

2 files changed

+179
-2
lines changed

2 files changed

+179
-2
lines changed

cpp/src/arrow/sparse_tensor.cc

Lines changed: 151 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,154 @@ class SparseTensorConverter<TYPE, SparseCSCIndex>
419419
inline Status CheckMaximumValue(const uint64_t) const { return Status::OK(); }
420420
};
421421

422+
// ----------------------------------------------------------------------
423+
// SparseTensorConverter for SparseCSFIndex
424+
425+
template <typename TYPE>
426+
class SparseTensorConverter<TYPE, SparseCSFIndex>
427+
: private SparseTensorConverterBase<TYPE> {
428+
public:
429+
using BaseClass = SparseTensorConverterBase<TYPE>;
430+
using typename BaseClass::NumericTensorType;
431+
using typename BaseClass::value_type;
432+
433+
SparseTensorConverter(const NumericTensorType& tensor,
434+
const std::shared_ptr<DataType>& index_value_type,
435+
MemoryPool* pool)
436+
: BaseClass(tensor, index_value_type, pool) {}
437+
438+
template <typename IndexValueType>
439+
Status Convert() {
440+
using c_index_value_type = typename IndexValueType::c_type;
441+
const int64_t indices_elsize = sizeof(c_index_value_type);
442+
443+
std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
444+
RETURN_NOT_OK(SparseCOOTensor::Make(tensor_, &sparse_coo_tensor));
445+
std::shared_ptr<Tensor> coords =
446+
arrow::internal::checked_pointer_cast<SparseCOOIndex>(
447+
sparse_coo_tensor->sparse_index())
448+
->indices();
449+
450+
// Convert SparseCOOTensor to long CSF buffers
451+
const int64_t ndim = tensor_.ndim();
452+
const int64_t nonzero_count = sparse_coo_tensor->non_zero_length();
453+
454+
std::vector<int64_t> counts(ndim);
455+
std::fill_n(counts.begin(), ndim, static_cast<int64_t>(0));
456+
457+
std::vector<int64_t> axis_order(ndim);
458+
for (int64_t i = 0; i < ndim; ++i) axis_order[i] = i;
459+
460+
std::shared_ptr<Buffer> indices_buffer;
461+
std::shared_ptr<Buffer> indptr_buffer;
462+
RETURN_NOT_OK(
463+
AllocateBuffer(pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
464+
RETURN_NOT_OK(AllocateBuffer(pool_, indices_elsize * (ndim - 1) * (nonzero_count + 1),
465+
&indptr_buffer));
466+
int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
467+
int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
468+
469+
for (int64_t row = 0; row < nonzero_count; ++row) {
470+
bool tree_split = false;
471+
for (int64_t column = 0; column < ndim; ++column) {
472+
bool change = coords->Value<IndexValueType>({row, column}) !=
473+
coords->Value<IndexValueType>({row - 1, column});
474+
475+
if (tree_split || change || row == 0) {
476+
if (row > 1) tree_split = true;
477+
478+
indices[column * nonzero_count + counts[column]] =
479+
coords->Value<IndexValueType>({row, column});
480+
indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
481+
++counts[column];
482+
}
483+
}
484+
}
485+
486+
for (int64_t column = 0; column < ndim; ++column) {
487+
indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
488+
}
489+
490+
int64_t total_size = counts[0];
491+
for (int64_t column = 1; column < ndim; ++column) {
492+
for (int64_t i = 0; i < counts[column] + 1; ++i) {
493+
if (column < ndim - 1)
494+
indptr[total_size + column + i] = indptr[column * (nonzero_count + 1) + i];
495+
if (i < counts[column])
496+
indices[total_size + i] = indices[column * nonzero_count + i];
497+
}
498+
total_size += counts[column];
499+
}
500+
501+
// Copy CSF index data into smaller buffers
502+
std::shared_ptr<Buffer> out_indices_buffer;
503+
std::shared_ptr<Buffer> out_indptr_buffer;
504+
RETURN_NOT_OK(
505+
AllocateBuffer(pool_, indices_elsize * total_size, &out_indices_buffer));
506+
RETURN_NOT_OK(AllocateBuffer(pool_,
507+
indices_elsize * total_size - nonzero_count + ndim - 1,
508+
&out_indptr_buffer));
509+
int64_t* out_indices = reinterpret_cast<int64_t*>(out_indices_buffer->mutable_data());
510+
int64_t* out_indptr = reinterpret_cast<int64_t*>(out_indptr_buffer->mutable_data());
511+
512+
for (int64_t i = 0; i < total_size; ++i) out_indices[i] = indices[i];
513+
514+
for (int64_t i = 0; i < total_size - nonzero_count + ndim - 1; ++i)
515+
out_indptr[i] = indptr[i];
516+
517+
// Construct SparseCSFTensor
518+
std::vector<int64_t> out_indptr_shape({total_size - nonzero_count + ndim - 1});
519+
std::shared_ptr<Tensor> out_indptr_tensor =
520+
std::make_shared<Tensor>(int64(), out_indptr_buffer, out_indptr_shape);
521+
522+
std::vector<int64_t> out_indices_shape({total_size});
523+
std::shared_ptr<Tensor> out_indices_tensor =
524+
std::make_shared<Tensor>(int64(), out_indices_buffer, out_indices_shape);
525+
526+
std::vector<int64_t> indptr_offsets(ndim - 1);
527+
std::vector<int64_t> indices_offsets(ndim);
528+
std::fill_n(indptr_offsets.begin(), ndim - 1, static_cast<int64_t>(0));
529+
std::fill_n(indices_offsets.begin(), ndim, static_cast<int64_t>(0));
530+
531+
for (int64_t i = 0; i < ndim - 2; ++i)
532+
indptr_offsets[i + 1] = indptr_offsets[i] + counts[i] + 1;
533+
534+
for (int64_t i = 0; i < ndim; ++i)
535+
indices_offsets[i + 1] = indices_offsets[i] + counts[i];
536+
537+
sparse_index =
538+
std::make_shared<SparseCSFIndex>(out_indptr_tensor, out_indices_tensor,
539+
indptr_offsets, indices_offsets, axis_order);
540+
data = sparse_coo_tensor->data();
541+
542+
return Status::OK();
543+
}
544+
545+
#define CALL_TYPE_SPECIFIC_CONVERT(TYPE_CLASS) \
546+
case TYPE_CLASS##Type::type_id: \
547+
return Convert<TYPE_CLASS##Type>();
548+
549+
Status Convert() {
550+
switch (index_value_type_->id()) {
551+
ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_TYPE_SPECIFIC_CONVERT);
552+
// LCOV_EXCL_START: The following invalid causes program failure.
553+
default:
554+
return Status::TypeError("Unsupported SparseTensor index value type");
555+
// LCOV_EXCL_STOP
556+
}
557+
}
558+
559+
#undef CALL_TYPE_SPECIFIC_CONVERT
560+
561+
std::shared_ptr<SparseCSFIndex> sparse_index;
562+
std::shared_ptr<Buffer> data;
563+
564+
private:
565+
using BaseClass::index_value_type_;
566+
using BaseClass::pool_;
567+
using BaseClass::tensor_;
568+
};
569+
422570
// ----------------------------------------------------------------------
423571
// Instantiate templates
424572

@@ -502,7 +650,8 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
502650
return MakeSparseTensorFromTensor<SparseCSCIndex>(tensor, index_value_type, pool,
503651
out_sparse_index, out_data);
504652
case SparseTensorFormat::CSF:
505-
return Status::Invalid("Unsupported Tensor value type");
653+
return MakeSparseTensorFromTensor<SparseCSFIndex>(tensor, index_value_type, pool,
654+
out_sparse_index, out_data);
506655

507656
// LCOV_EXCL_START: ignore program failure
508657
default:
@@ -812,7 +961,7 @@ SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
812961
const std::vector<int64_t>& indptr_offsets,
813962
const std::vector<int64_t>& indices_offsets,
814963
const std::vector<int64_t>& axis_order)
815-
: SparseIndexBase(indices->shape()[0] - indices_offsets.back()),
964+
: SparseIndexBase(indices->size() - indices_offsets.back()),
816965
indptr_(indptr),
817966
indices_(indices),
818967
indptr_offsets_(indptr_offsets),

cpp/src/arrow/sparse_tensor_test.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,4 +982,32 @@ TEST_F(TestSparseCSFTensor, TestToTensor) {
982982

983983
ASSERT_TRUE(tensor.Equals(*dense_tensor));
984984
}
985+
986+
TEST_F(TestSparseCSFTensor, CreationFromTensor) {
987+
std::vector<int64_t> values = {
988+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
989+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990+
1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
991+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
992+
std::vector<int64_t> shape({3, 3, 3, 4});
993+
std::vector<std::string> dim_names({"a", "b", "c", "d"});
994+
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
995+
Tensor tensor(int64(), buffer, shape, {}, dim_names);
996+
997+
std::shared_ptr<SparseCSFTensor> st;
998+
ASSERT_OK(SparseCSFTensor::Make(tensor, &st));
999+
1000+
ASSERT_EQ(8, st->non_zero_length());
1001+
ASSERT_TRUE(st->is_mutable());
1002+
1003+
ASSERT_EQ(dim_names, st->dim_names());
1004+
ASSERT_EQ("a", st->dim_name(0));
1005+
ASSERT_EQ("b", st->dim_name(1));
1006+
ASSERT_EQ("c", st->dim_name(2));
1007+
ASSERT_EQ("d", st->dim_name(3));
1008+
1009+
std::shared_ptr<Tensor> dt;
1010+
ASSERT_OK(st->ToTensor(&dt));
1011+
ASSERT_TRUE(tensor.Equals(*dt));
1012+
}
9851013
} // namespace arrow

0 commit comments

Comments
 (0)