Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions c_glib/arrow-glib/array-builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,7 @@ garrow_array_builder_append_nulls(GArrowArrayBuilder *builder,

auto arrow_builder =
static_cast<BUILDER>(garrow_array_builder_get_raw(builder));
uint8_t valid_bytes[n];
memset(valid_bytes, 0, sizeof(uint8_t) * n);
auto status = arrow_builder->AppendNulls(valid_bytes, n);
auto status = arrow_builder->AppendNulls(n);
return garrow_error_check(error, status, context);
}

Expand Down
43 changes: 32 additions & 11 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,11 @@ void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>
ASSERT_EQ(draws_[i] != 0, actual) << i;
}
}
ASSERT_TRUE(result->Equals(*expected));
AssertArraysEqual(*result, *expected);

// buffers are correctly sized
ASSERT_EQ(result->data()->buffers[0]->size(), BitUtil::BytesForBits(size));
ASSERT_EQ(result->data()->buffers[1]->size(), BitUtil::BytesForBits(size));

// Builder is now reset
ASSERT_EQ(0, builder->length());
Expand Down Expand Up @@ -518,15 +522,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {

TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) {
const int64_t size = 10;
const uint8_t valid_bytes[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};

ASSERT_OK(this->builder_->AppendNulls(valid_bytes, size));
ASSERT_OK(this->builder_->AppendNulls(size));

std::shared_ptr<Array> result;
FinishAndCheckPadding(this->builder_.get(), &result);

for (int64_t i = 0; i < size; ++i) {
ASSERT_EQ(result->IsValid(i), static_cast<bool>(valid_bytes[i]));
ASSERT_FALSE(result->IsValid(i));
}
}

Expand Down Expand Up @@ -922,6 +924,27 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
}

TEST(TestBooleanBuilder, AppendNullsAdvanceBuilder) {
BooleanBuilder builder;

std::vector<uint8_t> values = {1, 0, 0, 1};
std::vector<uint8_t> is_valid = {1, 1, 0, 1};

std::shared_ptr<Array> arr;
ASSERT_OK(builder.AppendValues(values.data(), 2));
ASSERT_OK(builder.AppendNulls(1));
ASSERT_OK(builder.AppendValues(values.data() + 3, 1));
ASSERT_OK(builder.Finish(&arr));

ASSERT_EQ(1, arr->null_count());

const auto& barr = static_cast<const BooleanArray&>(*arr);
ASSERT_TRUE(barr.Value(0));
ASSERT_FALSE(barr.Value(1));
ASSERT_TRUE(barr.IsNull(2));
ASSERT_TRUE(barr.Value(3));
}

TEST(TestBooleanBuilder, TestStdBoolVectorAppend) {
BooleanBuilder builder;
BooleanBuilder builder_nn;
Expand Down Expand Up @@ -1391,13 +1414,12 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendNull) {

TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
constexpr int64_t size = 10;
const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
ASSERT_OK(builder_->AppendNulls(size));

Done();

for (unsigned index = 0; index < size; ++index) {
ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
ASSERT_FALSE(result_->IsValid(index));
}
}

Expand Down Expand Up @@ -1526,13 +1548,12 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendNull) {

TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
constexpr int64_t size = 10;
const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
ASSERT_OK(builder_->AppendNulls(size));

Done();

for (unsigned index = 0; index < size; ++index) {
ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
ASSERT_FALSE(result_->IsValid(index));
}
}

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/array/builder_adaptive.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
public:
explicit AdaptiveIntBuilderBase(MemoryPool* pool);

/// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
/// \brief Append multiple nulls
/// \param[in] length the number of nulls to append
Status AppendNulls(int64_t length) {
ARROW_RETURN_NOT_OK(CommitPendingData());
ARROW_RETURN_NOT_OK(Reserve(length));
memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
UnsafeAppendToBitmap(valid_bytes, length);
UnsafeSetNull(length);
return Status::OK();
}

Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,10 @@ void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
null_bitmap_builder_.UnsafeAppend(length, true);
}

void ArrayBuilder::UnsafeSetNull(int64_t length) {
length_ += length;
null_count_ += length;
null_bitmap_builder_.UnsafeAppend(length, false);
}

} // namespace arrow
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ class ARROW_EXPORT ArrayBuilder {
// Set the next length bits to not null (i.e. valid).
void UnsafeSetNotNull(int64_t length);

void UnsafeSetNull(int64_t length);

static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);

static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
Expand Down
169 changes: 14 additions & 155 deletions cpp/src/arrow/array/builder_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,104 +45,8 @@ Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
return Status::OK();
}

// ----------------------------------------------------------------------

template <typename T>
void PrimitiveBuilder<T>::Reset() {
data_.reset();
raw_data_ = nullptr;
}

template <typename T>
Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
capacity = std::max(capacity, kMinBuilderCapacity);

int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
if (capacity_ == 0) {
RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
} else {
RETURN_NOT_OK(data_->Resize(nbytes));
}

raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
return ArrayBuilder::Resize(capacity);
}

template <typename T>
Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
const uint8_t* valid_bytes) {
RETURN_NOT_OK(Reserve(length));

if (length > 0) {
std::memcpy(raw_data_ + length_, values,
static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
}

// length_ is update by these
ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
return Status::OK();
}

template <typename T>
Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t length,
const std::vector<bool>& is_valid) {
RETURN_NOT_OK(Reserve(length));
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));

if (length > 0) {
std::memcpy(raw_data_ + length_, values,
static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
}

// length_ is update by these
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
return Status::OK();
}

template <typename T>
Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values,
const std::vector<bool>& is_valid) {
return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
}

template <typename T>
Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values) {
return AppendValues(values.data(), static_cast<int64_t>(values.size()));
}

template <typename T>
Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), data_.get()));
std::shared_ptr<Buffer> null_bitmap;
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
*out = ArrayData::Make(type_, length_, {null_bitmap, data_}, null_count_);

data_ = nullptr;
capacity_ = length_ = null_count_ = 0;

return Status::OK();
}

template class PrimitiveBuilder<UInt8Type>;
template class PrimitiveBuilder<UInt16Type>;
template class PrimitiveBuilder<UInt32Type>;
template class PrimitiveBuilder<UInt64Type>;
template class PrimitiveBuilder<Int8Type>;
template class PrimitiveBuilder<Int16Type>;
template class PrimitiveBuilder<Int32Type>;
template class PrimitiveBuilder<Int64Type>;
template class PrimitiveBuilder<Date32Type>;
template class PrimitiveBuilder<Date64Type>;
template class PrimitiveBuilder<Time32Type>;
template class PrimitiveBuilder<Time64Type>;
template class PrimitiveBuilder<TimestampType>;
template class PrimitiveBuilder<HalfFloatType>;
template class PrimitiveBuilder<FloatType>;
template class PrimitiveBuilder<DoubleType>;

BooleanBuilder::BooleanBuilder(MemoryPool* pool)
: ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
: ArrayBuilder(boolean(), pool), data_builder_(pool) {}

BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: BooleanBuilder(pool) {
Expand All @@ -151,57 +55,23 @@ BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool

void BooleanBuilder::Reset() {
ArrayBuilder::Reset();
data_.reset();
raw_data_ = nullptr;
data_builder_.Reset();
}

Status BooleanBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
capacity = std::max(capacity, kMinBuilderCapacity);

const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
if (capacity_ == 0) {
RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_));
raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());

// We zero the memory for booleans to keep things simple; for some reason if
// we do not, even though we may write every bit (through in-place | or &),
// valgrind will still show a warning. If we do not zero the bytes here, we
// will have to be careful to zero them in AppendNull and AppendNulls. Also,
// zeroing the bits results in deterministic bits when each byte may have a
// mix of nulls and not nulls.
//
// We only zero up to new_bitmap_size because the padding was zeroed by
// AllocateResizableBuffer
memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size));
} else {
const int64_t old_bitmap_capacity = data_->capacity();
RETURN_NOT_OK(data_->Resize(new_bitmap_size));
const int64_t new_bitmap_capacity = data_->capacity();
raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());

// See comment above about why we zero memory for booleans
memset(raw_data_ + old_bitmap_capacity, 0,
static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
}

RETURN_NOT_OK(data_builder_.Resize(capacity));
return ArrayBuilder::Resize(capacity);
}

Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
int64_t bit_offset = length_ % 8;
if (bit_offset > 0) {
// Adjust last byte
data_->mutable_data()[length_ / 8] &= BitUtil::kPrecedingBitmask[bit_offset];
}

std::shared_ptr<Buffer> null_bitmap;
std::shared_ptr<Buffer> data, null_bitmap;
RETURN_NOT_OK(data_builder_.Finish(&data));
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get()));

*out = ArrayData::Make(boolean(), length_, {null_bitmap, data_}, null_count_);
*out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);

data_ = nullptr;
capacity_ = length_ = null_count_ = 0;
return Status::OK();
}
Expand All @@ -211,10 +81,8 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
RETURN_NOT_OK(Reserve(length));

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[values, &i]() -> bool { return values[i++] != 0; });

// this updates length_
data_builder_.UnsafeAppend<false>(length,
[values, &i]() -> bool { return values[i++] != 0; });
ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
return Status::OK();
}
Expand All @@ -223,12 +91,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
const std::vector<bool>& is_valid) {
RETURN_NOT_OK(Reserve(length));
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[values, &i]() -> bool { return values[i++]; });

// this updates length_
data_builder_.UnsafeAppend<false>(length,
[values, &i]() -> bool { return values[i++]; });
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
return Status::OK();
}
Expand All @@ -247,25 +112,19 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
const int64_t length = static_cast<int64_t>(values.size());
RETURN_NOT_OK(Reserve(length));
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[&values, &i]() -> bool { return values[i++]; });

// this updates length_
data_builder_.UnsafeAppend<false>(length,
[&values, &i]() -> bool { return values[i++]; });
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
return Status::OK();
}

Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
const int64_t length = static_cast<int64_t>(values.size());
RETURN_NOT_OK(Reserve(length));

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[&values, &i]() -> bool { return values[i++]; });

// this updates length_
data_builder_.UnsafeAppend<false>(length,
[&values, &i]() -> bool { return values[i++]; });
ArrayBuilder::UnsafeSetNotNull(length);
return Status::OK();
}
Expand Down
Loading