Skip to content
32 changes: 32 additions & 0 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,32 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
}
}

void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) {
std::unique_ptr<arrow::ArrayBuilder> builder;
auto null_scalar = MakeNullScalar(scalar->type);
ASSERT_OK(MakeBuilder(pool, scalar->type, &builder));
ASSERT_OK(builder->AppendScalar(*scalar));
ASSERT_OK(builder->AppendScalar(*scalar));
ASSERT_OK(builder->AppendScalar(*null_scalar));
ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2));
ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2));

std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);
ASSERT_OK(out->ValidateFull());
ASSERT_EQ(out->length(), 9);
ASSERT_EQ(out->null_count(), 4);
for (const auto index : {0, 1, 3, 5, 6}) {
ASSERT_FALSE(out->IsNull(index));
ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
}
for (const auto index : {2, 4, 7, 8}) {
ASSERT_TRUE(out->IsNull(index));
}
}

TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
ASSERT_OK(null_array->ValidateFull());
Expand Down Expand Up @@ -447,6 +473,10 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_EQ(array->null_count(), 0);
}
}

for (auto scalar : scalars) {
AssertAppendScalar(pool_, scalar);
}
}

TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
Expand Down Expand Up @@ -481,6 +511,8 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
ASSERT_TRUE(item->Equals(scalar));
}

AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
}

TEST_F(TestArray, ValidateBuffersPrimitive) {
Expand Down
159 changes: 159 additions & 0 deletions cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"

namespace arrow {

Expand Down Expand Up @@ -92,6 +95,162 @@ Status ArrayBuilder::Advance(int64_t elements) {
return null_bitmap_builder_.Advance(elements);
}

namespace {
struct AppendScalarImpl {
template <typename T>
enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
is_fixed_size_binary_type<T>::value,
Status>
Visit(const T&) {
auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(scalar->value);
} else {
builder->UnsafeAppendNull();
}
}
}
return Status::OK();
}

template <typename T>
enable_if_base_binary<T, Status> Visit(const T&) {
int64_t data_size = 0;
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
data_size += scalar->value->size();
}
}

auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(util::string_view{*scalar->value});
} else {
builder->UnsafeAppendNull();
}
}
}
return Status::OK();
}

template <typename T>
enable_if_list_like<T, Status> Visit(const T&) {
auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
int64_t num_children = 0;
for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
scalar++) {
if (!(*scalar)->is_valid) continue;
num_children +=
internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
}
RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
scalar++) {
if ((*scalar)->is_valid) {
RETURN_NOT_OK(builder->Append());
const Array& list =
*internal::checked_cast<const BaseListScalar&>(**scalar).value;
for (int64_t i = 0; i < list.length(); i++) {
ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
}
} else {
RETURN_NOT_OK(builder_->AppendNull());
}
}
}
return Status::OK();
}

Status Visit(const StructType& type) {
auto* builder = internal::checked_cast<StructBuilder*>(builder_);
auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
RETURN_NOT_OK(builder->Reserve(count));
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
}
for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
if (!scalar.is_valid || !scalar.value[field_index]) {
RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
} else {
RETURN_NOT_OK(builder->field_builder(field_index)
->AppendScalar(*scalar.value[field_index]));
}
}
RETURN_NOT_OK(builder->Append(scalar.is_valid));
}
}
return Status::OK();
}

Status Visit(const DataType& type) {
return Status::NotImplemented("AppendScalar for type ", type);
}

Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }

const std::shared_ptr<Scalar>* scalars_begin_;
const std::shared_ptr<Scalar>* scalars_end_;
int64_t n_repeats_;
ArrayBuilder* builder_;
};
} // namespace

Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
if (!scalar.type->Equals(type())) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
}

Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
if (!scalar.type->Equals(type())) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
}

Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
if (scalars.empty()) return Status::OK();
const auto ty = type();
for (const auto& scalar : scalars) {
if (!scalar->type->Equals(ty)) {
return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
" to builder for type ", type()->ToString());
}
}
return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
/*n_repeats=*/1, this}
.Convert();
}

Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
std::shared_ptr<ArrayData> internal_data;
RETURN_NOT_OK(FinishInternal(&internal_data));
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class ARROW_EXPORT ArrayBuilder {
/// This method is useful when appending null values to a parent nested type.
virtual Status AppendEmptyValues(int64_t length) = 0;

/// \brief Append a value from a scalar
Status AppendScalar(const Scalar& scalar);
Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
Status AppendScalars(const ScalarVector& scalars);

/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
return Status::OK();
}

Status Append(const Buffer& s) {
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppend(util::string_view(s));
return Status::OK();
}

Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }

template <size_t NBYTES>
Status Append(const std::array<uint8_t, NBYTES>& value) {
ARROW_RETURN_NOT_OK(Reserve(1));
Expand Down Expand Up @@ -502,6 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
}

void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }

void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }

void UnsafeAppendNull() {
UnsafeAppendToBitmap(false);
byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "arrow/array/builder_primitive.h" // IWYU pragma: export
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
Expand Down
90 changes: 9 additions & 81 deletions cpp/src/arrow/testing/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,88 +95,16 @@ std::shared_ptr<arrow::Array> ConstantArrayGenerator::String(int64_t size,
return ConstantArray<StringType>(size, value);
}

struct ScalarVectorToArrayImpl {
template <typename T, typename AppendScalar,
typename BuilderType = typename TypeTraits<T>::BuilderType,
typename ScalarType = typename TypeTraits<T>::ScalarType>
Status UseBuilder(const AppendScalar& append) {
BuilderType builder(type_, default_memory_pool());
for (const auto& s : scalars_) {
if (s->is_valid) {
RETURN_NOT_OK(append(internal::checked_cast<const ScalarType&>(*s), &builder));
} else {
RETURN_NOT_OK(builder.AppendNull());
}
}
return builder.FinishInternal(&data_);
}

struct AppendValue {
template <typename BuilderType, typename ScalarType>
Status operator()(const ScalarType& s, BuilderType* builder) const {
return builder->Append(s.value);
}
};

struct AppendBuffer {
template <typename BuilderType, typename ScalarType>
Status operator()(const ScalarType& s, BuilderType* builder) const {
const Buffer& buffer = *s.value;
return builder->Append(util::string_view{buffer});
}
};

template <typename T>
enable_if_primitive_ctype<T, Status> Visit(const T&) {
return UseBuilder<T>(AppendValue{});
}

template <typename T>
enable_if_has_string_view<T, Status> Visit(const T&) {
return UseBuilder<T>(AppendBuffer{});
}

Status Visit(const StructType& type) {
data_ = ArrayData::Make(type_, static_cast<int64_t>(scalars_.size()),
{/*null_bitmap=*/nullptr});
data_->child_data.resize(type_->num_fields());

ScalarVector field_scalars(scalars_.size());

for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
for (size_t i = 0; i < scalars_.size(); ++i) {
field_scalars[i] =
internal::checked_cast<StructScalar*>(scalars_[i].get())->value[field_index];
}

ARROW_ASSIGN_OR_RAISE(data_->child_data[field_index],
ScalarVectorToArrayImpl{}.Convert(field_scalars));
}
return Status::OK();
}

Status Visit(const DataType& type) {
return Status::NotImplemented("ScalarVectorToArray for type ", type);
}

Result<std::shared_ptr<ArrayData>> Convert(const ScalarVector& scalars) && {
if (scalars.size() == 0) {
return Status::NotImplemented("ScalarVectorToArray with no scalars");
}
scalars_ = std::move(scalars);
type_ = scalars_[0]->type;
RETURN_NOT_OK(VisitTypeInline(*type_, this));
return std::move(data_);
}

std::shared_ptr<DataType> type_;
ScalarVector scalars_;
std::shared_ptr<ArrayData> data_;
};

Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector& scalars) {
ARROW_ASSIGN_OR_RAISE(auto data, ScalarVectorToArrayImpl{}.Convert(scalars));
return MakeArray(std::move(data));
if (scalars.empty()) {
return Status::NotImplemented("ScalarVectorToArray with no scalars");
}
std::unique_ptr<arrow::ArrayBuilder> builder;
RETURN_NOT_OK(MakeBuilder(default_memory_pool(), scalars[0]->type, &builder));
RETURN_NOT_OK(builder->AppendScalars(scalars));
std::shared_ptr<Array> out;
RETURN_NOT_OK(builder->Finish(&out));
return out;
}

} // namespace arrow