Skip to content

Commit

Permalink
Unsafe improvements: arrow/{array_reader,buffer}.
Browse files Browse the repository at this point in the history
  • Loading branch information
veluca93 committed Jul 8, 2024
1 parent 8355823 commit 06bcf8d
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 2 deletions.
6 changes: 6 additions & 0 deletions parquet/src/arrow/array_reader/fixed_len_byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ impl ArrayReader for FixedLenByteArrayReader {
.add_buffer(Buffer::from_vec(record_data.buffer))
.null_bit_buffer(self.record_reader.consume_bitmap_buffer());

// SAFETY: Assuming that a RecordReader produces a valid FixedLenByteArrayBuffer (as
// otherwise that code itself would be unsound), this call is safe if self.byte_length
// matches the byte length of the FixedLenByteArrayBuffer produced by the RecordReader
// matches the byte length passed as a type parameter to ArrayDataBuilder.
// FIXME: how do we know that the byte lengths match? What if record_data.byte_length
// is None?
let binary = FixedSizeBinaryArray::from(unsafe { array_data.build_unchecked() });

// TODO: An improvement might be to do this conversion on read
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/array_reader/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ impl ArrayReader for FixedSizeListArrayReader {
list_builder = list_builder.null_bit_buffer(Some(builder.into()));
}

// SAFETY: FIXME: document why this call is safe.
let list_data = unsafe { list_builder.build_unchecked() };

let result_array = FixedSizeListArray::from(list_data);
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
data_builder = data_builder.null_bit_buffer(Some(builder.into()))
}

// SAFETY: FIXME: document why this call is safe.
let list_data = unsafe { data_builder.build_unchecked() };

let result_array = GenericListArray::<OffsetSize>::from(list_data);
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl ArrayReader for MapArrayReader {
let data = array.to_data();
let builder = data.into_builder().data_type(self.data_type.clone());

// SAFETY - we can assume that ListArrayReader produces valid ListArray
// SAFETY: we can assume that ListArrayReader produces valid ListArray
// of the expected type, and as such its output can be reinterpreted as
// a MapArray without validation
Ok(Arc::new(MapArray::from(unsafe {
Expand Down
5 changes: 5 additions & 0 deletions parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ where
.add_buffer(record_data)
.null_bit_buffer(self.record_reader.consume_bitmap_buffer());

// SAFETY: Assuming that the RecordReader's null buffer is sufficiently sized for the
// corresponding Vec<T> buffer (or None), this is safe, as we have a single buffer
// obtained from a vector of `T`s, which must therefore have a valid memory representation
// for T. The type mapping above ensures that a valid T results in a valid Arrow array of
// type `arrow_data_type`.
let array_data = unsafe { array_data.build_unchecked() };
let array: ArrayRef = match T::get_physical_type() {
PhysicalType::BOOLEAN => Arc::new(BooleanArray::from(array_data)),
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ impl ArrayReader for StructArrayReader {
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}

// SAFETY: FIXME: document why this call is safe.
let array_data = unsafe { array_data_builder.build_unchecked() };
Ok(Arc::new(StructArray::from(array_data)))
}
Expand Down
4 changes: 4 additions & 0 deletions parquet/src/arrow/buffer/dictionary_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ impl<K: ArrowNativeType + Ord, V: OffsetSizeTrait> DictionaryBuffer<K, V> {

let data = match cfg!(debug_assertions) {
true => builder.build().unwrap(),
// SAFETY: FIXME: this is unsound. data_type is passed by the caller without
// any validation, which might result in creating invalid arrays, and this is a
// safe function which cannot have preconditions. Similar considerations apply
// to null_buffer.
false => unsafe { builder.build_unchecked() },
};

Expand Down
7 changes: 6 additions & 1 deletion parquet/src/arrow/buffer/offset_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {

let data = match cfg!(debug_assertions) {
true => array_data_builder.build().unwrap(),
// SAFETY: FIXME: this is unsound. data_type is passed by the caller without
// any validation, which might result in creating invalid arrays, and this is a
// safe function which cannot have preconditions. Similar considerations apply
// to null_buffer.
false => unsafe { array_data_builder.build_unchecked() },
};

Expand All @@ -164,7 +168,8 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
let len = (end - start).to_usize().unwrap();

if len != 0 {
// Safety: (1) the buffer is valid (2) the offsets are valid (3) the values in between are of ByteViewType
// SAFETY: (1) the buffer is valid (2) the offsets are valid (3) the values in between are of ByteViewType.
// This follows from `self` being itself valid.
unsafe {
builder.append_view_unchecked(block, start.as_usize() as u32, len as u32);
}
Expand Down

0 comments on commit 06bcf8d

Please sign in to comment.