Skip to content

Commit 82a40f3

Browse files
authored
Handle possible overflows in StringArrayBuilder / LargeStringArrayBuilder (#13802)
* test(13796): reproducer of overflow on capacity * fix(13796): handle overflows with proper max capacity number which is valid for MutableBuffer * refactor: use simple solution and provide panic
1 parent 63ce486 commit 82a40f3

File tree

1 file changed

+31
-6
lines changed

1 file changed

+31
-6
lines changed

datafusion/functions/src/strings.rs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,12 @@ pub struct StringArrayBuilder {
124124

125125
impl StringArrayBuilder {
126126
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
127-
let mut offsets_buffer =
128-
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>());
127+
let capacity = item_capacity
128+
.checked_add(1)
129+
.map(|i| i.saturating_mul(size_of::<i32>()))
130+
.expect("capacity integer overflow");
131+
132+
let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
129133
// SAFETY: the first offset value is definitely not going to exceed the bounds.
130134
unsafe { offsets_buffer.push_unchecked(0_i32) };
131135
Self {
@@ -182,7 +186,7 @@ impl StringArrayBuilder {
182186
.len()
183187
.try_into()
184188
.expect("byte array offset overflow");
185-
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
189+
self.offsets_buffer.push(next_offset);
186190
}
187191

188192
/// Finalise the builder into a concrete [`StringArray`].
@@ -289,8 +293,12 @@ pub struct LargeStringArrayBuilder {
289293

290294
impl LargeStringArrayBuilder {
291295
pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
292-
let mut offsets_buffer =
293-
MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>());
296+
let capacity = item_capacity
297+
.checked_add(1)
298+
.map(|i| i.saturating_mul(size_of::<i64>()))
299+
.expect("capacity integer overflow");
300+
301+
let mut offsets_buffer = MutableBuffer::with_capacity(capacity);
294302
// SAFETY: the first offset value is definitely not going to exceed the bounds.
295303
unsafe { offsets_buffer.push_unchecked(0_i64) };
296304
Self {
@@ -347,7 +355,7 @@ impl LargeStringArrayBuilder {
347355
.len()
348356
.try_into()
349357
.expect("byte array offset overflow");
350-
unsafe { self.offsets_buffer.push_unchecked(next_offset) };
358+
self.offsets_buffer.push(next_offset);
351359
}
352360

353361
/// Finalise the builder into a concrete [`LargeStringArray`].
@@ -452,3 +460,20 @@ impl ColumnarValueRef<'_> {
452460
}
453461
}
454462
}
463+
464+
#[cfg(test)]
465+
mod tests {
466+
use super::*;
467+
468+
#[test]
469+
#[should_panic(expected = "capacity integer overflow")]
470+
fn test_overflow_string_array_builder() {
471+
let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
472+
}
473+
474+
#[test]
475+
#[should_panic(expected = "capacity integer overflow")]
476+
fn test_overflow_large_string_array_builder() {
477+
let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX);
478+
}
479+
}

0 commit comments

Comments
 (0)