|
17 | 17 |
|
18 | 18 | use std::mem::size_of; |
19 | 19 |
|
| 20 | +use arrow::alloc::ALIGNMENT; |
20 | 21 | use arrow::array::{ |
21 | 22 | make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView, |
22 | 23 | GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray, |
@@ -124,8 +125,18 @@ pub struct StringArrayBuilder { |
124 | 125 |
|
125 | 126 | impl StringArrayBuilder { |
126 | 127 | pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { |
127 | | - let mut offsets_buffer = |
128 | | - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i32>()); |
| 128 | + // MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT` |
| 129 | + let capacity = std::cmp::max( |
| 130 | + item_capacity.saturating_add(1).saturating_mul(ALIGNMENT), |
| 131 | + data_capacity, |
| 132 | + ); |
| 133 | + // rust core's Layout::from_size_align is capped at isize::Max |
| 134 | + let capacity = std::cmp::min( |
| 135 | + capacity, |
| 136 | + isize::MAX as usize - (isize::MAX as usize % ALIGNMENT), |
| 137 | + ); |
| 138 | + |
| 139 | + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); |
129 | 140 | // SAFETY: the first offset value is definitely not going to exceed the bounds. |
130 | 141 | unsafe { offsets_buffer.push_unchecked(0_i32) }; |
131 | 142 | Self { |
@@ -289,8 +300,18 @@ pub struct LargeStringArrayBuilder { |
289 | 300 |
|
290 | 301 | impl LargeStringArrayBuilder { |
291 | 302 | pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { |
292 | | - let mut offsets_buffer = |
293 | | - MutableBuffer::with_capacity((item_capacity + 1) * size_of::<i64>()); |
| 303 | + // MutableBuffer::with_capacity will round up to multiples of `ALIGNMENT` |
| 304 | + let capacity = std::cmp::max( |
| 305 | + item_capacity.saturating_add(1).saturating_mul(ALIGNMENT), |
| 306 | + data_capacity, |
| 307 | + ); |
| 308 | + // rust core's Layout::from_size_align is capped at isize::Max |
| 309 | + let capacity = std::cmp::min( |
| 310 | + capacity, |
| 311 | + isize::MAX as usize - (isize::MAX as usize % ALIGNMENT), |
| 312 | + ); |
| 313 | + |
| 314 | + let mut offsets_buffer = MutableBuffer::with_capacity(capacity); |
294 | 315 | // SAFETY: the first offset value is definitely not going to exceed the bounds. |
295 | 316 | unsafe { offsets_buffer.push_unchecked(0_i64) }; |
296 | 317 | Self { |
@@ -455,11 +476,50 @@ impl ColumnarValueRef<'_> { |
455 | 476 |
|
456 | 477 | #[cfg(test)] |
457 | 478 | mod tests { |
| 479 | + use std::alloc::{GlobalAlloc, Layout, System}; |
| 480 | + |
458 | 481 | use super::*; |
459 | 482 |
|
| 483 | + #[derive(Default, Copy, Clone)] |
| 484 | + pub struct MockAllocator; |
| 485 | + |
| 486 | + unsafe impl GlobalAlloc for MockAllocator { |
| 487 | + /// only need to check it's valid |
| 488 | + #[inline] |
| 489 | + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { |
| 490 | + if layout.size() >= 9223372036854775744 { |
| 491 | + // system allocator will abort in such a way that is not detectable in tests |
| 492 | + // instead, throw a panic we can detect |
| 493 | + panic!("tried to allocate 9223372036854775744 bytes"); |
| 494 | + } else { |
| 495 | + System.alloc(layout) |
| 496 | + } |
| 497 | + } |
| 498 | + |
| 499 | + #[inline] |
| 500 | + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { |
| 501 | + System.dealloc(ptr, layout) |
| 502 | + } |
| 503 | + } |
| 504 | + |
| 505 | + #[global_allocator] |
| 506 | + static ALLOCATOR: MockAllocator = MockAllocator; |
| 507 | + |
460 | 508 | #[test] |
461 | | - fn test_overflow_string_array_builders() { |
| 509 | + #[should_panic(expected = "tried to allocate 9223372036854775744 bytes")] |
| 510 | + fn test_overflow_string_array_builder() { |
| 511 | + // should successfully handle overflow |
| 512 | + // while providing a number that passes upstream checks |
| 513 | + // and will then panic (by trying to allocate too much) |
462 | 514 | let _builder = StringArrayBuilder::with_capacity(usize::MAX, usize::MAX); |
| 515 | + } |
| 516 | + |
| 517 | + #[test] |
| 518 | + #[should_panic(expected = "tried to allocate 9223372036854775744 bytes")] |
| 519 | + fn test_overflow_large_string_array_builder() { |
| 520 | + // should successfully handle overflow |
| 521 | + // while providing a number that passes upstream checks |
| 522 | + // and will then panic (by trying to allocate too much) |
463 | 523 | let _builder = LargeStringArrayBuilder::with_capacity(usize::MAX, usize::MAX); |
464 | 524 | } |
465 | 525 | } |
0 commit comments