|
50 | 50 | offsets: Vec<O>,
|
51 | 51 | /// Nulls
|
52 | 52 | nulls: MaybeNullBufferBuilder,
|
| 53 | + /// The maximum size of the buffer for `0` |
| 54 | + max_buffer_size: usize, |
53 | 55 | }
|
54 | 56 |
|
55 | 57 | impl<O> ByteGroupValueBuilder<O>
|
|
62 | 64 | buffer: BufferBuilder::new(INITIAL_BUFFER_CAPACITY),
|
63 | 65 | offsets: vec![O::default()],
|
64 | 66 | nulls: MaybeNullBufferBuilder::new(),
|
| 67 | + max_buffer_size: if O::IS_LARGE { |
| 68 | + i64::MAX as usize |
| 69 | + } else { |
| 70 | + i32::MAX as usize |
| 71 | + }, |
65 | 72 | }
|
66 | 73 | }
|
67 | 74 |
|
@@ -187,6 +194,13 @@ where
|
187 | 194 | {
|
188 | 195 | let value: &[u8] = array.value(row).as_ref();
|
189 | 196 | self.buffer.append_slice(value);
|
| 197 | + |
| 198 | + assert!( |
| 199 | + self.buffer.len() <= self.max_buffer_size, |
| 200 | + "offset overflow, buffer size > {}", |
| 201 | + self.max_buffer_size |
| 202 | + ); |
| 203 | + |
190 | 204 | self.offsets.push(O::usize_as(self.buffer.len()));
|
191 | 205 | }
|
192 | 206 |
|
@@ -318,6 +332,7 @@ where
|
318 | 332 | mut buffer,
|
319 | 333 | offsets,
|
320 | 334 | nulls,
|
| 335 | + .. |
321 | 336 | } = *self;
|
322 | 337 |
|
323 | 338 | let null_buffer = nulls.build();
|
@@ -410,6 +425,24 @@ mod tests {
|
410 | 425 |
|
411 | 426 | use super::GroupColumn;
|
412 | 427 |
|
| 428 | + #[test] |
| 429 | + #[should_panic] |
| 430 | + fn test_byte_group_value_builder_overflow() { |
| 431 | + let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8); |
| 432 | + |
| 433 | + let large_string = "a".repeat(1024 * 1024); |
| 434 | + |
| 435 | + let array = |
| 436 | + Arc::new(StringArray::from(vec![Some(large_string.as_str())])) as ArrayRef; |
| 437 | + |
| 438 | + // Append items until our buffer length is 1 + i32::MAX as usize |
| 439 | + for _ in 0..2048 { |
| 440 | + builder.append_val(&array, 0); |
| 441 | + } |
| 442 | + |
| 443 | + assert_eq!(builder.value(2047), large_string.as_bytes()); |
| 444 | + } |
| 445 | + |
413 | 446 | #[test]
|
414 | 447 | fn test_byte_take_n() {
|
415 | 448 | let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
|
|
0 commit comments