Skip to content

Commit 6c1e99d

Browse files
committed
Add TypedBufferBuilder<bool> UnsafeAppend compile-time option to not track falses. Restore faster code from before this patch for appending C arrays and vector<bool>
Change-Id: I01d2d8e87db520a5c38f892a58e86d7a645e8877
1 parent 09d2bfe commit 6c1e99d

File tree

3 files changed

+52
-20
lines changed

3 files changed

+52
-20
lines changed

cpp/src/arrow/array/builder_primitive.cc

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,24 @@ Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
156156

157157
Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
158158
const uint8_t* valid_bytes) {
159-
return AppendValues(values, values + length, valid_bytes);
159+
RETURN_NOT_OK(Reserve(length));
160+
161+
int64_t i = 0;
162+
data_builder_.UnsafeAppend<false>(length,
163+
[values, &i]() -> bool { return values[i++] != 0; });
164+
ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
165+
return Status::OK();
160166
}
161167

162168
Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
163169
const std::vector<bool>& is_valid) {
164-
return AppendValues(values, values + length, is_valid.begin());
170+
RETURN_NOT_OK(Reserve(length));
171+
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
172+
int64_t i = 0;
173+
data_builder_.UnsafeAppend<false>(length,
174+
[values, &i]() -> bool { return values[i++]; });
175+
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
176+
return Status::OK();
165177
}
166178

167179
Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values,
@@ -175,11 +187,24 @@ Status BooleanBuilder::AppendValues(const std::vector<uint8_t>& values) {
175187

176188
Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
177189
const std::vector<bool>& is_valid) {
178-
return AppendValues(values.begin(), values.end(), is_valid.begin());
190+
const int64_t length = static_cast<int64_t>(values.size());
191+
RETURN_NOT_OK(Reserve(length));
192+
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
193+
int64_t i = 0;
194+
data_builder_.UnsafeAppend<false>(length,
195+
[&values, &i]() -> bool { return values[i++]; });
196+
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
197+
return Status::OK();
179198
}
180199

181200
Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
182-
return AppendValues(values.begin(), values.end());
201+
const int64_t length = static_cast<int64_t>(values.size());
202+
RETURN_NOT_OK(Reserve(length));
203+
int64_t i = 0;
204+
data_builder_.UnsafeAppend<false>(length,
205+
[&values, &i]() -> bool { return values[i++]; });
206+
ArrayBuilder::UnsafeSetNotNull(length);
207+
return Status::OK();
183208
}
184209

185210
} // namespace arrow

cpp/src/arrow/array/builder_primitive.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class ARROW_EXPORT NumericBuilder : public ArrayBuilder {
143143
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
144144
ARROW_RETURN_NOT_OK(Reserve(length));
145145
data_builder_.UnsafeAppend(values_begin, values_end);
146-
null_bitmap_builder_.UnsafeAppend(
146+
null_bitmap_builder_.UnsafeAppend<true>(
147147
length, [&valid_begin]() -> bool { return *valid_begin++; });
148148
length_ = null_bitmap_builder_.length();
149149
null_count_ = null_bitmap_builder_.false_count();
@@ -161,7 +161,7 @@ class ARROW_EXPORT NumericBuilder : public ArrayBuilder {
161161
if (valid_begin == NULLPTR) {
162162
UnsafeSetNotNull(length);
163163
} else {
164-
null_bitmap_builder_.UnsafeAppend(
164+
null_bitmap_builder_.UnsafeAppend<true>(
165165
length, [&valid_begin]() -> bool { return *valid_begin++; });
166166
length_ = null_bitmap_builder_.length();
167167
null_count_ = null_bitmap_builder_.false_count();
@@ -309,8 +309,8 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
309309
Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
310310
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
311311
ARROW_RETURN_NOT_OK(Reserve(length));
312-
data_builder_.UnsafeAppend(length,
313-
[&values_begin]() -> bool { return *values_begin++; });
312+
data_builder_.UnsafeAppend<false>(
313+
length, [&values_begin]() -> bool { return *values_begin++; });
314314
// this updates length_
315315
UnsafeSetNotNull(length);
316316
return Status::OK();
@@ -331,9 +331,9 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
331331
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
332332
ARROW_RETURN_NOT_OK(Reserve(length));
333333

334-
data_builder_.UnsafeAppend(length,
335-
[&values_begin]() -> bool { return *values_begin++; });
336-
null_bitmap_builder_.UnsafeAppend(
334+
data_builder_.UnsafeAppend<false>(
335+
length, [&values_begin]() -> bool { return *values_begin++; });
336+
null_bitmap_builder_.UnsafeAppend<true>(
337337
length, [&valid_begin]() -> bool { return *valid_begin++; });
338338
length_ = null_bitmap_builder_.length();
339339
null_count_ = null_bitmap_builder_.false_count();
@@ -346,13 +346,13 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
346346
ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
347347
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
348348
ARROW_RETURN_NOT_OK(Reserve(length));
349-
data_builder_.UnsafeAppend(length,
350-
[&values_begin]() -> bool { return *values_begin++; });
349+
data_builder_.UnsafeAppend<false>(
350+
length, [&values_begin]() -> bool { return *values_begin++; });
351351

352352
if (valid_begin == NULLPTR) {
353353
UnsafeSetNotNull(length);
354354
} else {
355-
null_bitmap_builder_.UnsafeAppend(
355+
null_bitmap_builder_.UnsafeAppend<true>(
356356
length, [&valid_begin]() -> bool { return *valid_begin++; });
357357
}
358358
length_ = null_bitmap_builder_.length();

cpp/src/arrow/buffer-builder.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <cstring>
2525
#include <memory>
2626
#include <string>
27+
#include <utility>
2728

2829
#include "arrow/buffer.h"
2930
#include "arrow/status.h"
@@ -307,14 +308,20 @@ class TypedBufferBuilder<bool> {
307308
bit_length_ += num_copies;
308309
}
309310

310-
template <typename Generator>
311+
template <bool count_falses, typename Generator>
311312
void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
312313
if (num_elements == 0) return;
313-
internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
314-
bool value = gen();
315-
false_count_ += !value;
316-
return value;
317-
});
314+
315+
if (count_falses) {
316+
internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
317+
bool value = gen();
318+
false_count_ += !value;
319+
return value;
320+
});
321+
} else {
322+
internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
323+
std::forward<Generator>(gen));
324+
}
318325
bit_length_ += num_elements;
319326
}
320327

0 commit comments

Comments
 (0)