@@ -30,7 +30,8 @@ use arrow_buffer::NullBuffer;
30
30
31
31
use arrow_schema:: FieldRef ;
32
32
use datafusion_common:: cast:: {
33
- as_generic_string_array, as_int64_array, as_list_array, as_string_array,
33
+ as_generic_string_array, as_int64_array, as_large_list_array, as_list_array,
34
+ as_string_array,
34
35
} ;
35
36
use datafusion_common:: utils:: array_into_list_array;
36
37
use datafusion_common:: {
@@ -1991,38 +1992,27 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result<ArrayRef> {
1991
1992
}
1992
1993
}
1993
1994
1994
- /// array_distinct SQL function
1995
- /// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
1996
- pub fn array_distinct ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
1997
- assert_eq ! ( args. len( ) , 1 ) ;
1998
-
1999
- // handle null
2000
- if args[ 0 ] . data_type ( ) == & DataType :: Null {
2001
- return Ok ( args[ 0 ] . clone ( ) ) ;
2002
- }
2003
-
2004
- let array = as_list_array ( & args[ 0 ] ) ?;
1995
+ pub fn general_array_distinct < OffsetSize : OffsetSizeTrait > (
1996
+ array : & GenericListArray < OffsetSize > ,
1997
+ field : & FieldRef ,
1998
+ ) -> Result < ArrayRef > {
2005
1999
let dt = array. value_type ( ) ;
2006
-
2007
- let mut offsets = vec ! [ 0 ] ;
2000
+ let mut offsets = vec ! [ OffsetSize :: usize_as( 0 ) ] ;
2008
2001
let mut new_arrays = vec ! [ ] ;
2009
-
2010
2002
let converter = RowConverter :: new ( vec ! [ SortField :: new( dt. clone( ) ) ] ) ?;
2011
2003
// distinct for each list in ListArray
2012
2004
for arr in array. iter ( ) . flatten ( ) {
2013
2005
let values = converter. convert_columns ( & [ arr] ) ?;
2014
-
2015
2006
let mut rows = Vec :: with_capacity ( values. num_rows ( ) ) ;
2016
2007
// sort elements in list and remove duplicates
2017
2008
for val in values. iter ( ) . sorted ( ) . dedup ( ) {
2018
2009
rows. push ( val) ;
2019
2010
}
2020
-
2021
- let last_offset: i32 = match offsets. last ( ) . copied ( ) {
2011
+ let last_offset: OffsetSize = match offsets. last ( ) . copied ( ) {
2022
2012
Some ( offset) => offset,
2023
2013
None => return internal_err ! ( "offsets should not be empty" ) ,
2024
2014
} ;
2025
- offsets. push ( last_offset + rows. len ( ) as i32 ) ;
2015
+ offsets. push ( last_offset + OffsetSize :: usize_as ( rows. len ( ) ) ) ;
2026
2016
let arrays = converter. convert_rows ( rows) ?;
2027
2017
let array = match arrays. get ( 0 ) {
2028
2018
Some ( array) => array. clone ( ) ,
@@ -2032,13 +2022,39 @@ pub fn array_distinct(args: &[ArrayRef]) -> Result<ArrayRef> {
2032
2022
} ;
2033
2023
new_arrays. push ( array) ;
2034
2024
}
2035
-
2036
- let field = Arc :: new ( Field :: new ( "item" , dt, true ) ) ;
2037
2025
let offsets = OffsetBuffer :: new ( offsets. into ( ) ) ;
2038
2026
let new_arrays_ref = new_arrays. iter ( ) . map ( |v| v. as_ref ( ) ) . collect :: < Vec < _ > > ( ) ;
2039
2027
let values = compute:: concat ( & new_arrays_ref) ?;
2040
- let arr = Arc :: new ( ListArray :: try_new ( field, offsets, values, None ) ?) ;
2041
- Ok ( arr)
2028
+ Ok ( Arc :: new ( GenericListArray :: < OffsetSize > :: try_new (
2029
+ field. clone ( ) ,
2030
+ offsets,
2031
+ values,
2032
+ None ,
2033
+ ) ?) )
2034
+ }
2035
+
2036
+ /// array_distinct SQL function
2037
+ /// example: from list [1, 3, 2, 3, 1, 2, 4] to [1, 2, 3, 4]
2038
+ pub fn array_distinct ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
2039
+ assert_eq ! ( args. len( ) , 1 ) ;
2040
+
2041
+ // handle null
2042
+ if args[ 0 ] . data_type ( ) == & DataType :: Null {
2043
+ return Ok ( args[ 0 ] . clone ( ) ) ;
2044
+ }
2045
+
2046
+ // handle for list & largelist
2047
+ match args[ 0 ] . data_type ( ) {
2048
+ DataType :: List ( field) => {
2049
+ let array = as_list_array ( & args[ 0 ] ) ?;
2050
+ general_array_distinct ( array, field)
2051
+ }
2052
+ DataType :: LargeList ( field) => {
2053
+ let array = as_large_list_array ( & args[ 0 ] ) ?;
2054
+ general_array_distinct ( array, field)
2055
+ }
2056
+ _ => internal_err ! ( "array_distinct only support list array" ) ,
2057
+ }
2042
2058
}
2043
2059
2044
2060
#[ cfg( test) ]
0 commit comments