Skip to content

Commit fc9269e

Browse files
committed
Use finish allocation
1 parent 53b3430 commit fc9269e

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

arrow-array/src/builder/generic_bytes_view_builder.rs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ pub struct GenericByteViewBuilder<T: ByteViewType + ?Sized> {
100100
/// map `<string hash> -> <index to the views>`
101101
string_tracker: Option<(HashTable<usize>, ahash::RandomState)>,
102102
phantom: PhantomData<T>,
103+
/// How much space to reserve for newly created buffers.
104+
///
105+
/// Defaults to 0
106+
initial_capacity: Option<usize>,
103107
}
104108

105109
impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
@@ -121,9 +125,16 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
121125
},
122126
string_tracker: None,
123127
phantom: Default::default(),
128+
initial_capacity: None,
124129
}
125130
}
126131

132+
/// Set the initial capacity for buffers after finish is called
133+
pub fn with_initial_capacity(mut self, initial_capacity: usize) -> Self {
134+
self.initial_capacity = Some(initial_capacity);
135+
self
136+
}
137+
127138
/// Set the target buffer load factor for appending views from existing arrays
128139
///
129140
/// Defaults to 50% if not set.
@@ -398,8 +409,18 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
398409
self.flush_in_progress();
399410
let completed = std::mem::take(&mut self.completed);
400411
let len = self.views_builder.len();
401-
let views = ScalarBuffer::new(self.views_builder.finish(), 0, len);
402-
let nulls = self.null_buffer_builder.finish();
412+
let (mut views_builder, mut null_buffer_builder) = match self.initial_capacity {
413+
Some(initial_capacity) => (
414+
BufferBuilder::new(initial_capacity),
415+
NullBufferBuilder::new(initial_capacity),
416+
),
417+
None => (BufferBuilder::default(), NullBufferBuilder::new(len)),
418+
};
419+
std::mem::swap(&mut views_builder, &mut self.views_builder);
420+
std::mem::swap(&mut null_buffer_builder, &mut self.null_buffer_builder);
421+
422+
let views = ScalarBuffer::new(views_builder.finish(), 0, len);
423+
let nulls = null_buffer_builder.finish();
403424
if let Some((ref mut ht, _)) = self.string_tracker.as_mut() {
404425
ht.clear();
405426
}

arrow-select/src/incremental_batch_builder.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,12 @@ fn instantiate_builder(data_type: &DataType, batch_size: usize) -> ArrayBuilderI
341341
/// with https://github.com/apache/datafusion/pull/16208
342342
fn instantiate_builder(data_type: &DataType, batch_size: usize) -> ArrayBuilderImpl {
343343
match data_type {
344-
DataType::Utf8View => Box::new(StringViewBuilder::with_capacity(batch_size)),
345-
DataType::BinaryView => Box::new(BinaryViewBuilder::with_capacity(batch_size)),
344+
DataType::Utf8View => {
345+
Box::new(StringViewBuilder::with_capacity(batch_size).with_initial_capacity(batch_size))
346+
}
347+
DataType::BinaryView => {
348+
Box::new(BinaryViewBuilder::with_capacity(batch_size).with_initial_capacity(batch_size))
349+
}
346350

347351
// Default to using the generic builder for all other types
348352
//

0 commit comments

Comments
 (0)