Skip to content

Commit 86ccb87

Browse files
fix largelist
1 parent 138f14d commit 86ccb87

File tree

1 file changed

+39
-23
lines changed

1 file changed

+39
-23
lines changed

datafusion/physical-expr/src/array_expressions.rs

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ use arrow_buffer::NullBuffer;
3030

3131
use arrow_schema::FieldRef;
3232
use datafusion_common::cast::{
33-
as_generic_string_array, as_int64_array, as_list_array, as_string_array,
33+
as_generic_string_array, as_int64_array, as_large_list_array, as_list_array,
34+
as_string_array,
3435
};
3536
use datafusion_common::utils::array_into_list_array;
3637
use datafusion_common::{
@@ -1991,38 +1992,27 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
19911992
}
19921993
}
19931994

1994-
/// array_distinct SQL function
1995-
/// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
1996-
pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
1997-
assert_eq!(args.len(), 1);
1998-
1999-
// handle null
2000-
if args[0].data_type() == &DataType::Null {
2001-
return Ok(args[0].clone());
2002-
}
2003-
2004-
let array = as_list_array(&args[0])?;
1995+
pub fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
1996+
array: &GenericListArray<OffsetSize>,
1997+
field: &FieldRef,
1998+
) -> Result<ArrayRef> {
20051999
let dt = array.value_type();
2006-
2007-
let mut offsets = vec![0];
2000+
let mut offsets = vec![OffsetSize::usize_as(0)];
20082001
let mut new_arrays = vec![];
2009-
20102002
let converter = RowConverter::new(vec![SortField::new(dt.clone())])?;
20112003
// distinct for each list in ListArray
20122004
for arr in array.iter().flatten() {
20132005
let values = converter.convert_columns(&[arr])?;
2014-
20152006
let mut rows = Vec::with_capacity(values.num_rows());
20162007
// sort elements in list and remove duplicates
20172008
for val in values.iter().sorted().dedup() {
20182009
rows.push(val);
20192010
}
2020-
2021-
let last_offset: i32 = match offsets.last().copied() {
2011+
let last_offset: OffsetSize = match offsets.last().copied() {
20222012
Some(offset) => offset,
20232013
None => return internal_err!("offsets should not be empty"),
20242014
};
2025-
offsets.push(last_offset + rows.len() as i32);
2015+
offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
20262016
let arrays = converter.convert_rows(rows)?;
20272017
let array = match arrays.get(0) {
20282018
Some(array) => array.clone(),
@@ -2032,13 +2022,39 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
20322022
};
20332023
new_arrays.push(array);
20342024
}
2035-
2036-
let field = Arc::new(Field::new("item", dt, true));
20372025
let offsets = OffsetBuffer::new(offsets.into());
20382026
let new_arrays_ref = new_arrays.iter().map(|v| v.as_ref()).collect::<Vec<_>>();
20392027
let values = compute::concat(&new_arrays_ref)?;
2040-
let arr = Arc::new(ListArray::try_new(field, offsets, values, None)?);
2041-
Ok(arr)
2028+
Ok(Arc::new(GenericListArray::<OffsetSize>::try_new(
2029+
field.clone(),
2030+
offsets,
2031+
values,
2032+
None,
2033+
)?))
2034+
}
2035+
2036+
/// array_distinct SQL function
2037+
/// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
2038+
pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
2039+
assert_eq!(args.len(), 1);
2040+
2041+
// handle null
2042+
if args[0].data_type() == &DataType::Null {
2043+
return Ok(args[0].clone());
2044+
}
2045+
2046+
// handle for list & largelist
2047+
match args[0].data_type() {
2048+
DataType::List(field) => {
2049+
let array = as_list_array(&args[0])?;
2050+
general_array_distinct(array, field)
2051+
}
2052+
DataType::LargeList(field) => {
2053+
let array = as_large_list_array(&args[0])?;
2054+
general_array_distinct(array, field)
2055+
}
2056+
_ => internal_err!("array_distinct only support list array"),
2057+
}
20422058
}
20432059

20442060
#[cfg(test)]

0 commit comments

Comments
 (0)