Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ include = [
"LICENSE.txt",
"NOTICE.txt",
]
edition = "2021"
rust-version = "1.84"
edition = "2024"
rust-version = "1.85"

[workspace.dependencies]
arrow = { version = "56.2.0", path = "./arrow", default-features = false }
Expand Down
5 changes: 4 additions & 1 deletion arrow-arith/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,10 @@ mod tests {

// `multiply` overflows on this case.
let err = mul(&a, &b).unwrap_err();
assert_eq!(err.to_string(), "Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000");
assert_eq!(
err.to_string(),
"Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
);

// Avoid overflow by reducing the scale.
let result = multiply_fixed_point(&a, &b, 28).unwrap();
Expand Down
7 changes: 5 additions & 2 deletions arrow-arith/src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ fn timestamp_op<T: TimestampOp>(
"Invalid timestamp arithmetic operation: {} {op} {}",
l.data_type(),
r.data_type()
)))
)));
}
};
Ok(Arc::new(array.with_timezone_opt(l.timezone())))
Expand Down Expand Up @@ -1263,7 +1263,10 @@ mod tests {
.with_precision_and_scale(37, 37)
.unwrap();
let err = mul(&a, &b).unwrap_err().to_string();
assert_eq!(err, "Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38");
assert_eq!(
err,
"Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38"
);

let a = Decimal128Array::from(vec![1])
.with_precision_and_scale(3, -2)
Expand Down
8 changes: 4 additions & 4 deletions arrow-array/src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,13 +420,13 @@ native_type_float_op!(
1.,
unsafe {
// Need to allow in clippy because
// current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0`
// current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
#[allow(unnecessary_transmutes)]
std::mem::transmute(-1_i32)
},
unsafe {
// Need to allow in clippy because
// current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0`
// current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
#[allow(unnecessary_transmutes)]
std::mem::transmute(i32::MAX)
}
Expand All @@ -437,13 +437,13 @@ native_type_float_op!(
1.,
unsafe {
// Need to allow in clippy because
// current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0`
// current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
#[allow(unnecessary_transmutes)]
std::mem::transmute(-1_i64)
},
unsafe {
// Need to allow in clippy because
// current MSRV (Minimum Supported Rust Version) is `1.84.0` but this item is stable since `1.87.0`
// current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
#[allow(unnecessary_transmutes)]
std::mem::transmute(i64::MAX)
}
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
&'a self,
indexes: impl Iterator<Item = Option<usize>> + 'a,
) -> impl Iterator<Item = Option<&'a [u8]>> {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) }
}
}

Expand Down
6 changes: 3 additions & 3 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ impl BooleanArray {
/// # Safety
/// This doesn't check bounds, the caller must ensure that index < self.len()
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
self.values.value_unchecked(i)
unsafe { self.values.value_unchecked(i) }
}

/// Returns the boolean value at index `i`.
Expand Down Expand Up @@ -222,7 +222,7 @@ impl BooleanArray {
&'a self,
indexes: impl Iterator<Item = Option<usize>> + 'a,
) -> impl Iterator<Item = Option<bool>> + 'a {
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) }
}

/// Create a [`BooleanArray`] by evaluating the operation for
Expand Down Expand Up @@ -355,7 +355,7 @@ impl ArrayAccessor for &BooleanArray {
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
BooleanArray::value_unchecked(self, index)
unsafe { BooleanArray::value_unchecked(self, index) }
}
}

Expand Down
63 changes: 37 additions & 26 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,28 +283,30 @@ impl<T: ByteArrayType> GenericByteArray<T> {
/// # Safety
/// Caller is responsible for ensuring that the index is within the bounds of the array
pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native {
let end = *self.value_offsets().get_unchecked(i + 1);
let start = *self.value_offsets().get_unchecked(i);

// Soundness
// pointer alignment & location is ensured by RawPtrBox
// buffer bounds/offset is ensured by the value_offset invariants

// Safety of `to_isize().unwrap()`
// `start` and `end` are &OffsetSize, which is a generic type that implements the
// OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
// both of which should cleanly cast to isize on an architecture that supports
// 32/64-bit offsets
let b = std::slice::from_raw_parts(
self.value_data
.as_ptr()
.offset(start.to_isize().unwrap_unchecked()),
(end - start).to_usize().unwrap_unchecked(),
);

// SAFETY:
// ArrayData is valid
T::Native::from_bytes_unchecked(b)
unsafe {
let end = *self.value_offsets().get_unchecked(i + 1);
let start = *self.value_offsets().get_unchecked(i);

// Soundness
// pointer alignment & location is ensured by RawPtrBox
// buffer bounds/offset is ensured by the value_offset invariants

// Safety of `to_isize().unwrap()`
// `start` and `end` are &OffsetSize, which is a generic type that implements the
// OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
// both of which should cleanly cast to isize on an architecture that supports
// 32/64-bit offsets
let b = std::slice::from_raw_parts(
self.value_data
.as_ptr()
.offset(start.to_isize().unwrap_unchecked()),
(end - start).to_usize().unwrap_unchecked(),
);

// SAFETY:
// ArrayData is valid
T::Native::from_bytes_unchecked(b)
}
}

/// Returns the element at index `i`
Expand Down Expand Up @@ -509,7 +511,7 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
GenericByteArray::value_unchecked(self, index)
unsafe { GenericByteArray::value_unchecked(self, index) }
}
}

Expand Down Expand Up @@ -603,14 +605,23 @@ mod tests {
let nulls = NullBuffer::new_null(3);
let err =
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
assert_eq!(
err.to_string(),
"Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3"
);

let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
assert_eq!(
err.to_string(),
"Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3"
);

let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
assert_eq!(
err.to_string(),
"Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2"
);

BinaryArray::new(offsets, non_utf8_data, None);

Expand Down
133 changes: 72 additions & 61 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,17 +324,19 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// Caller is responsible for ensuring that the index is within the bounds
/// of the array
pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
let v = self.views.get_unchecked(idx);
let len = *v as u32;
let b = if len <= MAX_INLINE_VIEW_LEN {
Self::inline_value(v, len as usize)
} else {
let view = ByteView::from(*v);
let data = self.buffers.get_unchecked(view.buffer_index as usize);
let offset = view.offset as usize;
data.get_unchecked(offset..offset + len as usize)
};
T::Native::from_bytes_unchecked(b)
unsafe {
let v = self.views.get_unchecked(idx);
let len = *v as u32;
let b = if len <= MAX_INLINE_VIEW_LEN {
Self::inline_value(v, len as usize)
} else {
let view = ByteView::from(*v);
let data = self.buffers.get_unchecked(view.buffer_index as usize);
let offset = view.offset as usize;
data.get_unchecked(offset..offset + len as usize)
};
T::Native::from_bytes_unchecked(b)
}
}

/// Returns the first `len` bytes the inline value of the view.
Expand All @@ -344,8 +346,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// - The `len` must be the length of the inlined value. It should never be larger than [`MAX_INLINE_VIEW_LEN`].
#[inline(always)]
pub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] {
debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize);
std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
unsafe {
debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize);
std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
}
}

/// Constructs a new iterator for iterating over the values of this array
Expand Down Expand Up @@ -540,28 +544,30 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// into the bytes just appended at the end of `data_buf`.
#[inline(always)]
unsafe fn copy_view_to_buffer(&self, i: usize, data_buf: &mut Vec<u8>) -> u128 {
// SAFETY: `i < self.len()` ensures this is in‑bounds.
let raw_view = *self.views().get_unchecked(i);
let mut bv = ByteView::from(raw_view);

// Inline‑small views stay as‑is.
if bv.length <= MAX_INLINE_VIEW_LEN {
raw_view
} else {
// SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length`
// must both lie within valid ranges for `self.buffers`.
let buffer = self.buffers.get_unchecked(bv.buffer_index as usize);
let start = bv.offset as usize;
let end = start + bv.length as usize;
let slice = buffer.get_unchecked(start..end);

// Copy out‑of‑line data into our single “0” buffer.
let new_offset = data_buf.len() as u32;
data_buf.extend_from_slice(slice);

bv.buffer_index = 0;
bv.offset = new_offset;
bv.into()
unsafe {
// SAFETY: `i < self.len()` ensures this is in‑bounds.
let raw_view = *self.views().get_unchecked(i);
let mut bv = ByteView::from(raw_view);

// Inline‑small views stay as‑is.
if bv.length <= MAX_INLINE_VIEW_LEN {
raw_view
} else {
// SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length`
// must both lie within valid ranges for `self.buffers`.
let buffer = self.buffers.get_unchecked(bv.buffer_index as usize);
let start = bv.offset as usize;
let end = start + bv.length as usize;
let slice = buffer.get_unchecked(start..end);

// Copy out‑of‑line data into our single “0” buffer.
let new_offset = data_buf.len() as u32;
data_buf.extend_from_slice(slice);

bv.buffer_index = 0;
bv.offset = new_offset;
bv.into()
}
}
}

Expand Down Expand Up @@ -624,36 +630,38 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
right: &GenericByteViewArray<T>,
right_idx: usize,
) -> Ordering {
let l_view = left.views().get_unchecked(left_idx);
let l_byte_view = ByteView::from(*l_view);
unsafe {
let l_view = left.views().get_unchecked(left_idx);
let l_byte_view = ByteView::from(*l_view);

let r_view = right.views().get_unchecked(right_idx);
let r_byte_view = ByteView::from(*r_view);
let r_view = right.views().get_unchecked(right_idx);
let r_byte_view = ByteView::from(*r_view);

let l_len = l_byte_view.length;
let r_len = r_byte_view.length;
let l_len = l_byte_view.length;
let r_len = r_byte_view.length;

if l_len <= 12 && r_len <= 12 {
return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view));
}
if l_len <= 12 && r_len <= 12 {
return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view));
}

// one of the string is larger than 12 bytes,
// we then try to compare the inlined data first
// one of the string is larger than 12 bytes,
// we then try to compare the inlined data first

// Note: In theory, ByteView is only used for string which is larger than 12 bytes,
// but we can still use it to get the inlined prefix for shorter strings.
// The prefix is always the first 4 bytes of the view, for both short and long strings.
let l_inlined_be = l_byte_view.prefix.swap_bytes();
let r_inlined_be = r_byte_view.prefix.swap_bytes();
if l_inlined_be != r_inlined_be {
return l_inlined_be.cmp(&r_inlined_be);
}
// Note: In theory, ByteView is only used for string which is larger than 12 bytes,
// but we can still use it to get the inlined prefix for shorter strings.
// The prefix is always the first 4 bytes of the view, for both short and long strings.
let l_inlined_be = l_byte_view.prefix.swap_bytes();
let r_inlined_be = r_byte_view.prefix.swap_bytes();
if l_inlined_be != r_inlined_be {
return l_inlined_be.cmp(&r_inlined_be);
}

// unfortunately, we need to compare the full data
let l_full_data: &[u8] = unsafe { left.value_unchecked(left_idx).as_ref() };
let r_full_data: &[u8] = unsafe { right.value_unchecked(right_idx).as_ref() };
// unfortunately, we need to compare the full data
let l_full_data: &[u8] = left.value_unchecked(left_idx).as_ref();
let r_full_data: &[u8] = right.value_unchecked(right_idx).as_ref();

l_full_data.cmp(r_full_data)
l_full_data.cmp(r_full_data)
}
}

/// Builds a 128-bit composite key for an inline value:
Expand Down Expand Up @@ -853,7 +861,7 @@ impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T>
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
GenericByteViewArray::value_unchecked(self, index)
unsafe { GenericByteViewArray::value_unchecked(self, index) }
}
}

Expand Down Expand Up @@ -999,7 +1007,7 @@ impl BinaryViewArray {
/// # Safety
/// Caller is responsible for ensuring that items in array are utf8 data.
pub unsafe fn to_string_view_unchecked(self) -> StringViewArray {
StringViewArray::new_unchecked(self.views, self.buffers, self.nulls)
unsafe { StringViewArray::new_unchecked(self.views, self.buffers, self.nulls) }
}
}

Expand Down Expand Up @@ -1171,7 +1179,10 @@ mod tests {
builder.finish()
};
assert_eq!(array.value(0), "large payload over 12 bytes");
assert_eq!(array.value(1), "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created");
assert_eq!(
array.value(1),
"another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created"
);
assert_eq!(2, array.buffers.len());
}

Expand Down
Loading
Loading