Skip to content

Commit 5bdaeaf

Browse files
Segfault in ByteGroupValueBuilder (#15968)
* test to demonstrate segfault in ByteGroupValueBuilder * check for offset overflow * clippy
1 parent f12ba60 commit 5bdaeaf

File tree

1 file changed

+33
-0
lines changed
  • datafusion/physical-plan/src/aggregates/group_values/multi_group_by

1 file changed

+33
-0
lines changed

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ where
5050
offsets: Vec<O>,
5151
/// Nulls
5252
nulls: MaybeNullBufferBuilder,
53+
/// The maximum size of the buffer for `0`
54+
max_buffer_size: usize,
5355
}
5456

5557
impl<O> ByteGroupValueBuilder<O>
@@ -62,6 +64,11 @@ where
6264
buffer: BufferBuilder::new(INITIAL_BUFFER_CAPACITY),
6365
offsets: vec![O::default()],
6466
nulls: MaybeNullBufferBuilder::new(),
67+
max_buffer_size: if O::IS_LARGE {
68+
i64::MAX as usize
69+
} else {
70+
i32::MAX as usize
71+
},
6572
}
6673
}
6774

@@ -187,6 +194,13 @@ where
187194
{
188195
let value: &[u8] = array.value(row).as_ref();
189196
self.buffer.append_slice(value);
197+
198+
assert!(
199+
self.buffer.len() <= self.max_buffer_size,
200+
"offset overflow, buffer size > {}",
201+
self.max_buffer_size
202+
);
203+
190204
self.offsets.push(O::usize_as(self.buffer.len()));
191205
}
192206

@@ -318,6 +332,7 @@ where
318332
mut buffer,
319333
offsets,
320334
nulls,
335+
..
321336
} = *self;
322337

323338
let null_buffer = nulls.build();
@@ -410,6 +425,24 @@ mod tests {
410425

411426
use super::GroupColumn;
412427

428+
#[test]
429+
#[should_panic]
430+
fn test_byte_group_value_builder_overflow() {
431+
let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
432+
433+
let large_string = "a".repeat(1024 * 1024);
434+
435+
let array =
436+
Arc::new(StringArray::from(vec![Some(large_string.as_str())])) as ArrayRef;
437+
438+
// Append items until our buffer length is 1 + i32::MAX as usize
439+
for _ in 0..2048 {
440+
builder.append_val(&array, 0);
441+
}
442+
443+
assert_eq!(builder.value(2047), large_string.as_bytes());
444+
}
445+
413446
#[test]
414447
fn test_byte_take_n() {
415448
let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);

0 commit comments

Comments
 (0)