Skip to content

Commit 0a2649f

Browse files
committed
make slice work for nested types
1 parent f41cb17 commit 0a2649f

File tree

2 files changed

+71
-17
lines changed

2 files changed

+71
-17
lines changed

arrow/src/array/array_struct.rs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,7 @@ impl From<ArrayData> for StructArray {
8585
fn from(data: ArrayData) -> Self {
8686
let mut boxed_fields = vec![];
8787
for cd in data.child_data() {
88-
let child_data = if data.offset() != 0 || data.len() != cd.len() {
89-
cd.slice(data.offset(), data.len())
90-
} else {
91-
cd.clone()
92-
};
93-
boxed_fields.push(make_array(child_data));
88+
boxed_fields.push(make_array(cd.clone()));
9489
}
9590
Self { data, boxed_fields }
9691
}

arrow/src/array/data.rs

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
use std::mem;
2222
use std::sync::Arc;
2323

24-
use crate::datatypes::{DataType, IntervalUnit};
24+
use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}};
2525
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
2626
use crate::{
2727
buffer::{Buffer, MutableBuffer},
2828
util::bit_util,
2929
};
3030

31-
use super::equal::equal;
31+
use super::{OffsetSizeTrait, equal::equal};
3232

3333
#[inline]
3434
pub(crate) fn count_nulls(
@@ -385,15 +385,55 @@ impl ArrayData {
385385
pub fn slice(&self, offset: usize, length: usize) -> ArrayData {
386386
assert!((offset + length) <= self.len());
387387

388-
let mut new_data = self.clone();
389-
390-
new_data.len = length;
391-
new_data.offset = offset + self.offset;
392-
393-
new_data.null_count =
394-
count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);
395-
396-
new_data
388+
// If data type is primitive, it's quick to clone array
389+
if self.child_data().is_empty() {
390+
let mut new_data = self.clone();
391+
392+
new_data.len = length;
393+
new_data.offset = offset + self.offset;
394+
395+
new_data.null_count =
396+
count_nulls(new_data.null_buffer(), new_data.offset, new_data.len);
397+
398+
new_data
399+
} else {
400+
// Slice into children
401+
let new_offset = self.offset + offset;
402+
let new_data = ArrayData {
403+
data_type: self.data_type().clone(),
404+
len: length,
405+
null_count: count_nulls(self.null_buffer(), new_offset, length),
406+
offset: new_offset,
407+
buffers: self.buffers.clone(),
408+
child_data: self.child_data().iter().map(|data| {
409+
match self.data_type() {
410+
DataType::List(_) => {
411+
let (start, end) = get_list_child_slice::<i32>(
412+
self.buffers.get(0).unwrap(),
413+
offset,
414+
length
415+
);
416+
data.slice(start, end - start)
417+
}
418+
DataType::LargeList(_) => {
419+
let (start, end) = get_list_child_slice::<i64>(
420+
self.buffers.get(0).unwrap(),
421+
offset,
422+
length
423+
);
424+
data.slice(start, end - start)
425+
}
426+
_ => {
427+
// All other types don't require computing offsets
428+
data.slice(offset, length)
429+
}
430+
}
431+
}).collect(),
432+
null_bitmap: self.null_bitmap().clone(),
433+
};
434+
435+
new_data
436+
}
397437
}
398438

399439
/// Returns the `buffer` as a slice of type `T` starting at self.offset
@@ -467,6 +507,25 @@ impl ArrayData {
467507
}
468508
}
469509

510+
#[inline]
511+
fn get_list_child_slice<OffsetSize: OffsetSizeTrait>(
512+
buffer: &Buffer,
513+
offset: usize,
514+
length: usize
515+
) -> (usize, usize) {
516+
let raw_buffer = buffer.as_ptr();
517+
let value_offsets: &[OffsetSize] = unsafe {
518+
let value_offsets = RawPtrBox::<OffsetSize>::new(raw_buffer);
519+
std::slice::from_raw_parts(
520+
value_offsets.as_ptr().add(offset),
521+
length + 1,
522+
)
523+
};
524+
let start = value_offsets[0];
525+
let end = value_offsets[length - 1];
526+
(start.to_usize().unwrap(), end.to_usize().unwrap())
527+
}
528+
470529
impl PartialEq for ArrayData {
471530
fn eq(&self, other: &Self) -> bool {
472531
equal(self, other)

0 commit comments

Comments
 (0)