Skip to content

Commit fc6cc48

Browse files
authored
feat: support largelist in array_slice (#8561)
* support largelist in array_slice * remove T trait * fix clippy
1 parent 2e16c75 commit fc6cc48

File tree

2 files changed

+208
-31
lines changed

2 files changed

+208
-31
lines changed

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 79 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,33 @@ pub fn array_except(args: &[ArrayRef]) -> Result<ArrayRef> {
524524
///
525525
/// See test cases in `array.slt` for more details.
526526
pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> {
527-
let list_array = as_list_array(&args[0])?;
528-
let from_array = as_int64_array(&args[1])?;
529-
let to_array = as_int64_array(&args[2])?;
527+
let array_data_type = args[0].data_type();
528+
match array_data_type {
529+
DataType::List(_) => {
530+
let array = as_list_array(&args[0])?;
531+
let from_array = as_int64_array(&args[1])?;
532+
let to_array = as_int64_array(&args[2])?;
533+
general_array_slice::<i32>(array, from_array, to_array)
534+
}
535+
DataType::LargeList(_) => {
536+
let array = as_large_list_array(&args[0])?;
537+
let from_array = as_int64_array(&args[1])?;
538+
let to_array = as_int64_array(&args[2])?;
539+
general_array_slice::<i64>(array, from_array, to_array)
540+
}
541+
_ => not_impl_err!("array_slice does not support type: {:?}", array_data_type),
542+
}
543+
}
530544

531-
let values = list_array.values();
545+
fn general_array_slice<O: OffsetSizeTrait>(
546+
array: &GenericListArray<O>,
547+
from_array: &Int64Array,
548+
to_array: &Int64Array,
549+
) -> Result<ArrayRef>
550+
where
551+
i64: TryInto<O>,
552+
{
553+
let values = array.values();
532554
let original_data = values.to_data();
533555
let capacity = Capacities::Array(original_data.len());
534556

@@ -539,72 +561,98 @@ pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> {
539561
// We have the slice syntax compatible with DuckDB v0.8.1.
540562
// The rule `adjusted_from_index` and `adjusted_to_index` follows the rule of array_slice in duckdb.
541563

542-
fn adjusted_from_index(index: i64, len: usize) -> Option<i64> {
564+
fn adjusted_from_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
565+
where
566+
i64: TryInto<O>,
567+
{
543568
// 0 ~ len - 1
544569
let adjusted_zero_index = if index < 0 {
545-
index + len as i64
570+
if let Ok(index) = index.try_into() {
571+
index + len
572+
} else {
573+
return exec_err!("array_slice got invalid index: {}", index);
574+
}
546575
} else {
547576
// array_slice(arr, 1, to) is the same as array_slice(arr, 0, to)
548-
std::cmp::max(index - 1, 0)
577+
if let Ok(index) = index.try_into() {
578+
std::cmp::max(index - O::usize_as(1), O::usize_as(0))
579+
} else {
580+
return exec_err!("array_slice got invalid index: {}", index);
581+
}
549582
};
550583

551-
if 0 <= adjusted_zero_index && adjusted_zero_index < len as i64 {
552-
Some(adjusted_zero_index)
584+
if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
585+
Ok(Some(adjusted_zero_index))
553586
} else {
554587
// Out of bounds
555-
None
588+
Ok(None)
556589
}
557590
}
558591

559-
fn adjusted_to_index(index: i64, len: usize) -> Option<i64> {
592+
fn adjusted_to_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
593+
where
594+
i64: TryInto<O>,
595+
{
560596
// 0 ~ len - 1
561597
let adjusted_zero_index = if index < 0 {
562598
// array_slice in duckdb with negative to_index is python-like, so index itself is exclusive
563-
index + len as i64 - 1
599+
if let Ok(index) = index.try_into() {
600+
index + len - O::usize_as(1)
601+
} else {
602+
return exec_err!("array_slice got invalid index: {}", index);
603+
}
564604
} else {
565605
// array_slice(arr, from, len + 1) is the same as array_slice(arr, from, len)
566-
std::cmp::min(index - 1, len as i64 - 1)
606+
if let Ok(index) = index.try_into() {
607+
std::cmp::min(index - O::usize_as(1), len - O::usize_as(1))
608+
} else {
609+
return exec_err!("array_slice got invalid index: {}", index);
610+
}
567611
};
568612

569-
if 0 <= adjusted_zero_index && adjusted_zero_index < len as i64 {
570-
Some(adjusted_zero_index)
613+
if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
614+
Ok(Some(adjusted_zero_index))
571615
} else {
572616
// Out of bounds
573-
None
617+
Ok(None)
574618
}
575619
}
576620

577-
let mut offsets = vec![0];
621+
let mut offsets = vec![O::usize_as(0)];
578622

579-
for (row_index, offset_window) in list_array.offsets().windows(2).enumerate() {
580-
let start = offset_window[0] as usize;
581-
let end = offset_window[1] as usize;
623+
for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
624+
let start = offset_window[0];
625+
let end = offset_window[1];
582626
let len = end - start;
583627

584628
// len 0 indicate array is null, return empty array in this row.
585-
if len == 0 {
629+
if len == O::usize_as(0) {
586630
offsets.push(offsets[row_index]);
587631
continue;
588632
}
589633

590634
// If index is null, we consider it as the minimum / maximum index of the array.
591635
let from_index = if from_array.is_null(row_index) {
592-
Some(0)
636+
Some(O::usize_as(0))
593637
} else {
594-
adjusted_from_index(from_array.value(row_index), len)
638+
adjusted_from_index::<O>(from_array.value(row_index), len)?
595639
};
596640

597641
let to_index = if to_array.is_null(row_index) {
598-
Some(len as i64 - 1)
642+
Some(len - O::usize_as(1))
599643
} else {
600-
adjusted_to_index(to_array.value(row_index), len)
644+
adjusted_to_index::<O>(to_array.value(row_index), len)?
601645
};
602646

603647
if let (Some(from), Some(to)) = (from_index, to_index) {
604648
if from <= to {
605-
assert!(start + to as usize <= end);
606-
mutable.extend(0, start + from as usize, start + to as usize + 1);
607-
offsets.push(offsets[row_index] + (to - from + 1) as i32);
649+
assert!(start + to <= end);
650+
mutable.extend(
651+
0,
652+
(start + from).to_usize().unwrap(),
653+
(start + to + O::usize_as(1)).to_usize().unwrap(),
654+
);
655+
offsets.push(offsets[row_index] + (to - from + O::usize_as(1)));
608656
} else {
609657
// invalid range, return empty array
610658
offsets.push(offsets[row_index]);
@@ -617,9 +665,9 @@ pub fn array_slice(args: &[ArrayRef]) -> Result<ArrayRef> {
617665

618666
let data = mutable.freeze();
619667

620-
Ok(Arc::new(ListArray::try_new(
621-
Arc::new(Field::new("item", list_array.value_type(), true)),
622-
OffsetBuffer::new(offsets.into()),
668+
Ok(Arc::new(GenericListArray::<O>::try_new(
669+
Arc::new(Field::new("item", array.value_type(), true)),
670+
OffsetBuffer::<O>::new(offsets.into()),
623671
arrow_array::make_array(data),
624672
None,
625673
)?))

0 commit comments

Comments
 (0)