Skip to content

Commit 18f90fb

Browse files
authored
ARROW-1945: [C++] Add data_capacity_ to track capacity of value data
data_capacity_ represents the indicated capacity for value_data_builder and it is always smaller than or equal to the actual capacity of underlying value_data_builder (data_capacity_ <= value_data_builder.capacity()). That's because when we say: ReserveData(capacity); The new capacity is max(data_capacity_, data length + capacity), and data_capacity_ is set to be equal to new capacity but underlying buffer size is set to BitUtil::RoundUpToMultipleOf64(new capacity) to ensure that the capacity of the buffer is a multiple of 64 bytes as defined in Layout.md. That's why data_capacity_ is needed to show the indicated capacity of the BinaryBuilder, just like ArrayBuilder::capacity_ indicates the indicated capacity of ArrayBuilder. A safety check is added in BinaryBuilder::Append() to update data_capacity_ if data length is greater than data_capacity_. The reason is that data_capacity is updated in ResearveData(). But if users make mistakes to append too much data, data length might be larger than data_capacity_ (data length <= actual capacity of underlying value_data_builder). If this happens data_capacity_ is set equal to data length to avoid confusion.
1 parent bbc6527 commit 18f90fb

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

cpp/src/arrow/builder.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const {
12081208
// String and binary
12091209

12101210
BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
1211-
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
1211+
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {}
12121212

12131213
BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
12141214

@@ -1227,11 +1227,15 @@ Status BinaryBuilder::Resize(int64_t capacity) {
12271227
}
12281228

12291229
Status BinaryBuilder::ReserveData(int64_t capacity) {
1230-
if(value_data_length() + capacity > std::numeric_limits<int32_t>::max()) {
1230+
if (value_data_length() + capacity > data_capacity_) {
1231+
if (value_data_length() + capacity > std::numeric_limits<int32_t>::max()) {
12311232
return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data");
1232-
}
1233-
1234-
return value_data_builder_.Resize(value_data_length() + capacity);
1233+
}
1234+
1235+
RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity));
1236+
data_capacity_ = value_data_length() + capacity;
1237+
}
1238+
return Status::OK();
12351239
}
12361240

12371241
Status BinaryBuilder::AppendNextOffset() {
@@ -1249,6 +1253,9 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
12491253
RETURN_NOT_OK(Reserve(1));
12501254
RETURN_NOT_OK(AppendNextOffset());
12511255
RETURN_NOT_OK(value_data_builder_.Append(value, length));
1256+
if (data_capacity_ < value_data_length()) {
1257+
data_capacity_ = value_data_length();
1258+
}
12521259
UnsafeAppendToBitmap(true);
12531260
return Status::OK();
12541261
}

0 commit comments

Comments
 (0)