Skip to content

Commit 512195b

Browse files
authored
Merge branch 'main' into alamb/refactor_push_decoder
2 parents 12bca80 + 422da15 commit 512195b

File tree

152 files changed

+4137
-743
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+4137
-743
lines changed

.github/actions/setup-builder/action.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# under the License.
1717

1818
name: Prepare Rust Builder
19-
description: 'Prepare Rust Build Environment'
19+
description: "Prepare Rust Build Environment"
2020
runs:
2121
using: "composite"
2222
steps:
@@ -53,6 +53,9 @@ runs:
5353
- name: Enable backtraces
5454
shell: bash
5555
run: echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
56+
- name: Disable incremental compilation
57+
shell: bash
58+
run: echo CARGO_INCREMENTAL=0 >> $GITHUB_ENV
5659
- name: Fixup git permissions
5760
# https://github.com/actions/checkout/issues/766
5861
shell: bash

arrow-array/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ authors = { workspace = true }
2525
license = { workspace = true }
2626
keywords = { workspace = true }
2727
include = { workspace = true }
28-
edition = { workspace = true }
28+
edition = "2024"
2929
rust-version = { workspace = true }
3030

3131
[lib]

arrow-array/benches/fixed_size_list_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use arrow_array::{Array, FixedSizeListArray, Int32Array};
1919
use arrow_schema::Field;
2020
use criterion::*;
21-
use rand::{rng, Rng};
21+
use rand::{Rng, rng};
2222
use std::{hint, sync::Arc};
2323

2424
fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray {

arrow-array/benches/occupancy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use arrow_array::types::Int32Type;
1919
use arrow_array::{DictionaryArray, Int32Array};
2020
use arrow_buffer::NullBuffer;
2121
use criterion::*;
22-
use rand::{rng, Rng};
22+
use rand::{Rng, rng};
2323
use std::{hint, sync::Arc};
2424

2525
fn gen_dict(

arrow-array/benches/union_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use arrow_array::{Array, ArrayRef, Int32Array, UnionArray};
2121
use arrow_buffer::{NullBuffer, ScalarBuffer};
2222
use arrow_schema::{DataType, Field, UnionFields};
2323
use criterion::*;
24-
use rand::{rng, Rng};
24+
use rand::{Rng, rng};
2525

2626
fn array_with_nulls() -> ArrayRef {
2727
let mut rng = rng();

arrow-array/src/arithmetic.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
18+
use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano, i256};
1919
use arrow_schema::ArrowError;
2020
use half::f16;
2121
use num_complex::ComplexFloat;
@@ -454,9 +454,7 @@ mod tests {
454454
use super::*;
455455

456456
macro_rules! assert_approx_eq {
457-
( $x: expr, $y: expr ) => {{
458-
assert_approx_eq!($x, $y, 1.0e-4)
459-
}};
457+
( $x: expr, $y: expr ) => {{ assert_approx_eq!($x, $y, 1.0e-4) }};
460458
( $x: expr, $y: expr, $tol: expr ) => {{
461459
let x_val = $x;
462460
let y_val = $y;

arrow-array/src/array/binary_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
9090
&'a self,
9191
indexes: impl Iterator<Item = Option<usize>> + 'a,
9292
) -> impl Iterator<Item = Option<&'a [u8]>> {
93-
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
93+
unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) }
9494
}
9595
}
9696

arrow-array/src/array/boolean_array.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::array::print_long_array;
1919
use crate::builder::BooleanBuilder;
2020
use crate::iterator::BooleanIter;
2121
use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
22-
use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
22+
use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
2323
use arrow_data::{ArrayData, ArrayDataBuilder};
2424
use arrow_schema::DataType;
2525
use std::any::Any;
@@ -184,7 +184,7 @@ impl BooleanArray {
184184
/// # Safety
185185
/// This doesn't check bounds, the caller must ensure that index < self.len()
186186
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
187-
self.values.value_unchecked(i)
187+
unsafe { self.values.value_unchecked(i) }
188188
}
189189

190190
/// Returns the boolean value at index `i`.
@@ -222,7 +222,7 @@ impl BooleanArray {
222222
&'a self,
223223
indexes: impl Iterator<Item = Option<usize>> + 'a,
224224
) -> impl Iterator<Item = Option<bool>> + 'a {
225-
indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
225+
indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
226226
}
227227

228228
/// Create a [`BooleanArray`] by evaluating the operation for
@@ -355,7 +355,7 @@ impl ArrayAccessor for &BooleanArray {
355355
}
356356

357357
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
358-
BooleanArray::value_unchecked(self, index)
358+
unsafe { BooleanArray::value_unchecked(self, index) }
359359
}
360360
}
361361

@@ -486,7 +486,7 @@ impl From<BooleanBuffer> for BooleanArray {
486486
mod tests {
487487
use super::*;
488488
use arrow_buffer::Buffer;
489-
use rand::{rng, Rng};
489+
use rand::{Rng, rng};
490490

491491
#[test]
492492
fn test_boolean_fmt_debug() {

arrow-array/src/array/byte_array.rs

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
use crate::array::{get_offsets, print_long_array};
1919
use crate::builder::GenericByteBuilder;
2020
use crate::iterator::ArrayIter;
21-
use crate::types::bytes::ByteArrayNativeType;
2221
use crate::types::ByteArrayType;
22+
use crate::types::bytes::ByteArrayNativeType;
2323
use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait, Scalar};
2424
use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
2525
use arrow_buffer::{NullBuffer, OffsetBuffer};
@@ -283,8 +283,8 @@ impl<T: ByteArrayType> GenericByteArray<T> {
283283
/// # Safety
284284
/// Caller is responsible for ensuring that the index is within the bounds of the array
285285
pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native {
286-
let end = *self.value_offsets().get_unchecked(i + 1);
287-
let start = *self.value_offsets().get_unchecked(i);
286+
let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
287+
let start = *unsafe { self.value_offsets().get_unchecked(i) };
288288

289289
// Soundness
290290
// pointer alignment & location is ensured by RawPtrBox
@@ -295,16 +295,18 @@ impl<T: ByteArrayType> GenericByteArray<T> {
295295
// OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
296296
// both of which should cleanly cast to isize on an architecture that supports
297297
// 32/64-bit offsets
298-
let b = std::slice::from_raw_parts(
299-
self.value_data
300-
.as_ptr()
301-
.offset(start.to_isize().unwrap_unchecked()),
302-
(end - start).to_usize().unwrap_unchecked(),
303-
);
298+
let b = unsafe {
299+
std::slice::from_raw_parts(
300+
self.value_data
301+
.as_ptr()
302+
.offset(start.to_isize().unwrap_unchecked()),
303+
(end - start).to_usize().unwrap_unchecked(),
304+
)
305+
};
304306

305307
// SAFETY:
306308
// ArrayData is valid
307-
T::Native::from_bytes_unchecked(b)
309+
unsafe { T::Native::from_bytes_unchecked(b) }
308310
}
309311

310312
/// Returns the element at index `i`
@@ -509,7 +511,7 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
509511
}
510512

511513
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
512-
GenericByteArray::value_unchecked(self, index)
514+
unsafe { GenericByteArray::value_unchecked(self, index) }
513515
}
514516
}
515517

@@ -603,14 +605,23 @@ mod tests {
603605
let nulls = NullBuffer::new_null(3);
604606
let err =
605607
StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
606-
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
608+
assert_eq!(
609+
err.to_string(),
610+
"Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3"
611+
);
607612

608613
let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
609-
assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
614+
assert_eq!(
615+
err.to_string(),
616+
"Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3"
617+
);
610618

611619
let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
612620
let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
613-
assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
621+
assert_eq!(
622+
err.to_string(),
623+
"Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2"
624+
);
614625

615626
BinaryArray::new(offsets, non_utf8_data, None);
616627

arrow-array/src/array/byte_view_array.rs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -324,17 +324,17 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
324324
/// Caller is responsible for ensuring that the index is within the bounds
325325
/// of the array
326326
pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
327-
let v = self.views.get_unchecked(idx);
327+
let v = unsafe { self.views.get_unchecked(idx) };
328328
let len = *v as u32;
329329
let b = if len <= MAX_INLINE_VIEW_LEN {
330-
Self::inline_value(v, len as usize)
330+
unsafe { Self::inline_value(v, len as usize) }
331331
} else {
332332
let view = ByteView::from(*v);
333-
let data = self.buffers.get_unchecked(view.buffer_index as usize);
333+
let data = unsafe { self.buffers.get_unchecked(view.buffer_index as usize) };
334334
let offset = view.offset as usize;
335-
data.get_unchecked(offset..offset + len as usize)
335+
unsafe { data.get_unchecked(offset..offset + len as usize) }
336336
};
337-
T::Native::from_bytes_unchecked(b)
337+
unsafe { T::Native::from_bytes_unchecked(b) }
338338
}
339339

340340
/// Returns the first `len` bytes the inline value of the view.
@@ -345,7 +345,9 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
345345
#[inline(always)]
346346
pub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] {
347347
debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize);
348-
std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
348+
unsafe {
349+
std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
350+
}
349351
}
350352

351353
/// Constructs a new iterator for iterating over the values of this array
@@ -541,7 +543,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
541543
#[inline(always)]
542544
unsafe fn copy_view_to_buffer(&self, i: usize, data_buf: &mut Vec<u8>) -> u128 {
543545
// SAFETY: `i < self.len()` ensures this is in‑bounds.
544-
let raw_view = *self.views().get_unchecked(i);
546+
let raw_view = unsafe { *self.views().get_unchecked(i) };
545547
let mut bv = ByteView::from(raw_view);
546548

547549
// Inline‑small views stay as‑is.
@@ -550,10 +552,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
550552
} else {
551553
// SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length`
552554
// must both lie within valid ranges for `self.buffers`.
553-
let buffer = self.buffers.get_unchecked(bv.buffer_index as usize);
555+
let buffer = unsafe { self.buffers.get_unchecked(bv.buffer_index as usize) };
554556
let start = bv.offset as usize;
555557
let end = start + bv.length as usize;
556-
let slice = buffer.get_unchecked(start..end);
558+
let slice = unsafe { buffer.get_unchecked(start..end) };
557559

558560
// Copy out‑of‑line data into our single “0” buffer.
559561
let new_offset = data_buf.len() as u32;
@@ -624,10 +626,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
624626
right: &GenericByteViewArray<T>,
625627
right_idx: usize,
626628
) -> Ordering {
627-
let l_view = left.views().get_unchecked(left_idx);
629+
let l_view = unsafe { left.views().get_unchecked(left_idx) };
628630
let l_byte_view = ByteView::from(*l_view);
629631

630-
let r_view = right.views().get_unchecked(right_idx);
632+
let r_view = unsafe { right.views().get_unchecked(right_idx) };
631633
let r_byte_view = ByteView::from(*r_view);
632634

633635
let l_len = l_byte_view.length;
@@ -853,7 +855,7 @@ impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T>
853855
}
854856

855857
unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
856-
GenericByteViewArray::value_unchecked(self, index)
858+
unsafe { GenericByteViewArray::value_unchecked(self, index) }
857859
}
858860
}
859861

@@ -999,7 +1001,7 @@ impl BinaryViewArray {
9991001
/// # Safety
10001002
/// Caller is responsible for ensuring that items in array are utf8 data.
10011003
pub unsafe fn to_string_view_unchecked(self) -> StringViewArray {
1002-
StringViewArray::new_unchecked(self.views, self.buffers, self.nulls)
1004+
unsafe { StringViewArray::new_unchecked(self.views, self.buffers, self.nulls) }
10031005
}
10041006
}
10051007

@@ -1171,7 +1173,10 @@ mod tests {
11711173
builder.finish()
11721174
};
11731175
assert_eq!(array.value(0), "large payload over 12 bytes");
1174-
assert_eq!(array.value(1), "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created");
1176+
assert_eq!(
1177+
array.value(1),
1178+
"another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created"
1179+
);
11751180
assert_eq!(2, array.buffers.len());
11761181
}
11771182

0 commit comments

Comments
 (0)