|
21 | 21 | use std::mem; |
22 | 22 | use std::sync::Arc; |
23 | 23 |
|
24 | | -use crate::datatypes::{DataType, IntervalUnit}; |
| 24 | +use crate::{array::raw_pointer::RawPtrBox, datatypes::{DataType, IntervalUnit}}; |
25 | 25 | use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; |
26 | 26 | use crate::{ |
27 | 27 | buffer::{Buffer, MutableBuffer}, |
28 | 28 | util::bit_util, |
29 | 29 | }; |
30 | 30 |
|
31 | | -use super::equal::equal; |
| 31 | +use super::{OffsetSizeTrait, equal::equal}; |
32 | 32 |
|
33 | 33 | #[inline] |
34 | 34 | pub(crate) fn count_nulls( |
@@ -385,15 +385,55 @@ impl ArrayData { |
385 | 385 | pub fn slice(&self, offset: usize, length: usize) -> ArrayData { |
386 | 386 | assert!((offset + length) <= self.len()); |
387 | 387 |
|
388 | | - let mut new_data = self.clone(); |
389 | | - |
390 | | - new_data.len = length; |
391 | | - new_data.offset = offset + self.offset; |
392 | | - |
393 | | - new_data.null_count = |
394 | | - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); |
395 | | - |
396 | | - new_data |
| 388 | + // If data type is primitive, it's quick to clone array |
| 389 | + if self.child_data().is_empty() { |
| 390 | + let mut new_data = self.clone(); |
| 391 | + |
| 392 | + new_data.len = length; |
| 393 | + new_data.offset = offset + self.offset; |
| 394 | + |
| 395 | + new_data.null_count = |
| 396 | + count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); |
| 397 | + |
| 398 | + new_data |
| 399 | + } else { |
| 400 | + // Slice into children |
| 401 | + let new_offset = self.offset + offset; |
| 402 | + let new_data = ArrayData { |
| 403 | + data_type: self.data_type().clone(), |
| 404 | + len: length, |
| 405 | + null_count: count_nulls(self.null_buffer(), new_offset, length), |
| 406 | + offset: new_offset, |
| 407 | + buffers: self.buffers.clone(), |
| 408 | + child_data: self.child_data().iter().map(|data| { |
| 409 | + match self.data_type() { |
| 410 | + DataType::List(_) => { |
| 411 | + let (start, end) = get_list_child_slice::<i32>( |
| 412 | + self.buffers.get(0).unwrap(), |
| 413 | + offset, |
| 414 | + length |
| 415 | + ); |
| 416 | + data.slice(start, end - start) |
| 417 | + } |
| 418 | + DataType::LargeList(_) => { |
| 419 | + let (start, end) = get_list_child_slice::<i64>( |
| 420 | + self.buffers.get(0).unwrap(), |
| 421 | + offset, |
| 422 | + length |
| 423 | + ); |
| 424 | + data.slice(start, end - start) |
| 425 | + } |
| 426 | + _ => { |
| 427 | + // All other types don't require computing offsets |
| 428 | + data.slice(offset, length) |
| 429 | + } |
| 430 | + } |
| 431 | + }).collect(), |
| 432 | + null_bitmap: self.null_bitmap().clone(), |
| 433 | + }; |
| 434 | + |
| 435 | + new_data |
| 436 | + } |
397 | 437 | } |
398 | 438 |
|
399 | 439 | /// Returns the `buffer` as a slice of type `T` starting at self.offset |
@@ -467,6 +507,25 @@ impl ArrayData { |
467 | 507 | } |
468 | 508 | } |
469 | 509 |
|
| 510 | +#[inline] |
| 511 | +fn get_list_child_slice<OffsetSize: OffsetSizeTrait>( |
| 512 | + buffer: &Buffer, |
| 513 | + offset: usize, |
| 514 | + length: usize |
| 515 | +) -> (usize, usize) { |
| 516 | + let raw_buffer = buffer.as_ptr(); |
| 517 | + let value_offsets: &[OffsetSize] = unsafe { |
| 518 | + let value_offsets = RawPtrBox::<OffsetSize>::new(raw_buffer); |
| 519 | + std::slice::from_raw_parts( |
| 520 | + value_offsets.as_ptr().add(offset), |
| 521 | + length + 1, |
| 522 | + ) |
| 523 | + }; |
| 524 | + let start = value_offsets[0]; |
| 525 | + let end = value_offsets[length - 1]; |
| 526 | + (start.to_usize().unwrap(), end.to_usize().unwrap()) |
| 527 | +} |
| 528 | + |
470 | 529 | impl PartialEq for ArrayData { |
471 | 530 | fn eq(&self, other: &Self) -> bool { |
472 | 531 | equal(self, other) |
|
0 commit comments