|
21 | 21 | use std::mem; |
22 | 22 | use std::sync::Arc; |
23 | 23 |
|
24 | | -use crate::datatypes::{DataType, IntervalUnit}; |
| 24 | +use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}}; |
25 | 25 | use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; |
26 | 26 | use crate::{ |
27 | 27 | buffer::{Buffer, MutableBuffer}, |
28 | 28 | util::bit_util, |
29 | 29 | }; |
30 | 30 |
|
31 | | -use super::equal::equal; |
| 31 | +use super::{OffsetSizeTrait, equal::equal}; |
32 | 32 |
|
33 | 33 | #[inline] |
34 | 34 | pub(crate) fn count_nulls( |
@@ -383,15 +383,55 @@ impl ArrayData { |
383 | 383 | pub fn slice(&self, offset: usize, length: usize) -> ArrayData { |
384 | 384 | assert!((offset + length) <= self.len()); |
385 | 385 |
|
386 | | - let mut new_data = self.clone(); |
387 | | - |
388 | | - new_data.len = length; |
389 | | - new_data.offset = offset + self.offset; |
390 | | - |
391 | | - new_data.null_count = |
392 | | - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); |
393 | | - |
394 | | - new_data |
| 386 | + // If data type is primitive, it's quick to clone array |
| 387 | + if self.child_data().is_empty() { |
| 388 | + let mut new_data = self.clone(); |
| 389 | + |
| 390 | + new_data.len = length; |
| 391 | + new_data.offset = offset + self.offset; |
| 392 | + |
| 393 | + new_data.null_count = |
| 394 | + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); |
| 395 | + |
| 396 | + new_data |
| 397 | + } else { |
| 398 | + // Slice into children |
| 399 | + let new_offset = self.offset + offset; |
| 400 | + let new_data = ArrayData { |
| 401 | + data_type: self.data_type().clone(), |
| 402 | + len: length, |
| 403 | + null_count: count_nulls(self.null_buffer(), new_offset, length), |
| 404 | + offset: new_offset, |
| 405 | + buffers: self.buffers.clone(), |
| 406 | + child_data: self.child_data().iter().map(|data| { |
| 407 | + match self.data_type() { |
| 408 | + DataType::List(_) => { |
| 409 | + let (start, end) = get_list_child_slice::<i32>( |
| 410 | + self.buffers.get(0).unwrap(), |
| 411 | + offset, |
| 412 | + length |
| 413 | + ); |
| 414 | + data.slice(start, end - start) |
| 415 | + } |
| 416 | + DataType::LargeList(_) => { |
| 417 | + let (start, end) = get_list_child_slice::<i64>( |
| 418 | + self.buffers.get(0).unwrap(), |
| 419 | + offset, |
| 420 | + length |
| 421 | + ); |
| 422 | + data.slice(start, end - start) |
| 423 | + } |
| 424 | + _ => { |
| 425 | + // All other types don't require computing offsets |
| 426 | + data.slice(offset, length) |
| 427 | + } |
| 428 | + } |
| 429 | + }).collect(), |
| 430 | + null_bitmap: self.null_bitmap().clone(), |
| 431 | + }; |
| 432 | + |
| 433 | + new_data |
| 434 | + } |
395 | 435 | } |
396 | 436 |
|
397 | 437 | /// Returns the `buffer` as a slice of type `T` starting at self.offset |
@@ -465,6 +505,25 @@ impl ArrayData { |
465 | 505 | } |
466 | 506 | } |
467 | 507 |
|
| 508 | +#[inline] |
| 509 | +fn get_list_child_slice<OffsetSize: OffsetSizeTrait>( |
| 510 | + buffer: &Buffer, |
| 511 | + offset: usize, |
| 512 | + length: usize |
| 513 | +) -> (usize, usize) { |
| 514 | + let raw_buffer = buffer.as_ptr(); |
| 515 | + let value_offsets: &[OffsetSize] = unsafe { |
| 516 | + let value_offsets = RawPtrBox::<OffsetSize>::new(raw_buffer); |
| 517 | + std::slice::from_raw_parts( |
| 518 | + value_offsets.as_ptr().add(offset), |
| 519 | + length + 1, |
| 520 | + ) |
| 521 | + }; |
| 522 | + let start = value_offsets[0]; |
| 523 | + let end = value_offsets[length - 1]; |
| 524 | + (start.to_usize().unwrap(), end.to_usize().unwrap()) |
| 525 | +} |
| 526 | + |
468 | 527 | impl PartialEq for ArrayData { |
469 | 528 | fn eq(&self, other: &Self) -> bool { |
470 | 529 | equal(self, other) |
|
0 commit comments