Skip to content

Commit 2f96204

Browse files
samuelerescarluvatonfriendlymatthewjhorstmann
authored
Adding try_append_value implementation to ByteViewBuilder (#8594)
# Which issue does this PR close? - Partial fix for apache/datafusion#17857 # Rationale for this change These changes add a safer version of `append_value` in `ByteViewBuilder` that handles panics called `try_append_value`. Datafusions will consume the API and handle the Result coming back from the function. # What changes are included in this PR? # Are these changes tested? The method is already covered by existing tests. # Are there any user-facing changes? No breaking changes, as the original `append_value` method hasn't changed. --------- Co-authored-by: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Co-authored-by: Matthew Kim <38759997+friendlymatthew@users.noreply.github.com> Co-authored-by: Jörn Horstmann <git@jhorstmann.net>
1 parent f131b54 commit 2f96204

File tree

1 file changed

+30
-5
lines changed

1 file changed

+30
-5
lines changed

arrow-array/src/builder/generic_bytes_view_builder.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,15 +306,30 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
306306
/// - String length exceeds `u32::MAX`
307307
#[inline]
308308
pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
309+
self.try_append_value(value).unwrap()
310+
}
311+
312+
/// Appends a value into the builder
313+
///
314+
/// # Errors
315+
///
316+
/// Returns an error if:
317+
/// - String buffer count exceeds `u32::MAX`
318+
/// - String length exceeds `u32::MAX`
319+
#[inline]
320+
pub fn try_append_value(&mut self, value: impl AsRef<T::Native>) -> Result<(), ArrowError> {
309321
let v: &[u8] = value.as_ref().as_ref();
310-
let length: u32 = v.len().try_into().unwrap();
322+
let length: u32 = v.len().try_into().map_err(|_| {
323+
ArrowError::InvalidArgumentError(format!("String length {} exceeds u32::MAX", v.len()))
324+
})?;
325+
311326
if length <= MAX_INLINE_VIEW_LEN {
312327
let mut view_buffer = [0; 16];
313328
view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
314329
view_buffer[4..4 + v.len()].copy_from_slice(v);
315330
self.views_buffer.push(u128::from_le_bytes(view_buffer));
316331
self.null_buffer_builder.append_non_null();
317-
return;
332+
return Ok(());
318333
}
319334

320335
// Deduplication if:
@@ -339,7 +354,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
339354
self.views_buffer.push(self.views_buffer[*idx]);
340355
self.null_buffer_builder.append_non_null();
341356
self.string_tracker = Some((ht, hasher));
342-
return;
357+
return Ok(());
343358
}
344359
Entry::Vacant(vacant) => {
345360
// o.w. we insert the (string hash -> view index)
@@ -356,17 +371,28 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
356371
let to_reserve = v.len().max(self.block_size.next_size() as usize);
357372
self.in_progress.reserve(to_reserve);
358373
};
374+
359375
let offset = self.in_progress.len() as u32;
360376
self.in_progress.extend_from_slice(v);
361377

378+
let buffer_index: u32 = self.completed.len().try_into().map_err(|_| {
379+
ArrowError::InvalidArgumentError(format!(
380+
"Buffer count {} exceeds u32::MAX",
381+
self.completed.len()
382+
))
383+
})?;
384+
362385
let view = ByteView {
363386
length,
387+
// This won't panic as we checked the length of prefix earlier.
364388
prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
365-
buffer_index: self.completed.len() as u32,
389+
buffer_index,
366390
offset,
367391
};
368392
self.views_buffer.push(view.into());
369393
self.null_buffer_builder.append_non_null();
394+
395+
Ok(())
370396
}
371397

372398
/// Append an `Option` value into the builder
@@ -581,7 +607,6 @@ mod tests {
581607
use core::str;
582608

583609
use super::*;
584-
use crate::Array;
585610

586611
#[test]
587612
fn test_string_view_deduplicate() {

0 commit comments

Comments
 (0)