Skip to content

Commit 44916a1

Browse files
committed
Handle more FixedSizeLists
1 parent 79bc1bf commit 44916a1

File tree

13 files changed

+138
-148
lines changed

13 files changed

+138
-148
lines changed

datafusion/expr-common/src/signature.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -885,12 +885,16 @@ impl Signature {
885885
}
886886

887887
/// Specialized [Signature] for functions that take a fixed number of arrays.
888-
pub fn arrays(n: usize, volatility: Volatility) -> Self {
888+
pub fn arrays(
889+
n: usize,
890+
coercion: Option<ListCoercion>,
891+
volatility: Volatility,
892+
) -> Self {
889893
Signature {
890894
type_signature: TypeSignature::ArraySignature(
891895
ArrayFunctionSignature::Array {
892896
arguments: vec![ArrayFunctionArgument::Array; n],
893-
array_coercion: Some(ListCoercion::FixedSizedListToList),
897+
array_coercion: coercion,
894898
},
895899
),
896900
volatility,
@@ -939,7 +943,7 @@ impl Signature {
939943

940944
/// Specialized [Signature] for ArrayEmpty and similar functions.
941945
pub fn array(volatility: Volatility) -> Self {
942-
Signature::arrays(1, volatility)
946+
Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility)
943947
}
944948
}
945949

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,9 @@ fn get_valid_types(
391391
element_types.push(field.data_type().clone());
392392
list_sizes.push(*size)
393393
}
394-
arg_type => plan_err!(
395-
"{function_name} does not support an argument of type {arg_type}"
396-
)?,
394+
arg_type => {
395+
plan_err!("{function_name} does not support type {arg_type}")?
396+
}
397397
},
398398
}
399399
}
@@ -1167,7 +1167,11 @@ mod tests {
11671167
#[test]
11681168
fn test_get_valid_types_array_and_array() -> Result<()> {
11691169
let function = "array_and_array";
1170-
let signature = Signature::arrays(2, Volatility::Immutable);
1170+
let signature = Signature::arrays(
1171+
2,
1172+
Some(ListCoercion::FixedSizedListToList),
1173+
Volatility::Immutable,
1174+
);
11711175

11721176
let data_types = vec![
11731177
DataType::new_list(DataType::Int32, true),
@@ -1273,17 +1277,14 @@ mod tests {
12731277
#[test]
12741278
fn test_get_valid_types_fixed_size_arrays() -> Result<()> {
12751279
let function = "fixed_size_arrays";
1276-
let signature = TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
1277-
arguments: vec![ArrayFunctionArgument::Array; 2],
1278-
array_coercion: None,
1279-
});
1280+
let signature = Signature::arrays(2, None, Volatility::Immutable);
12801281

12811282
let data_types = vec![
12821283
DataType::new_fixed_size_list(DataType::Int64, 3, true),
12831284
DataType::new_fixed_size_list(DataType::Int32, 5, true),
12841285
];
12851286
assert_eq!(
1286-
get_valid_types(function, &signature, &data_types)?,
1287+
get_valid_types(function, &signature.type_signature, &data_types)?,
12871288
vec![vec![
12881289
DataType::new_fixed_size_list(DataType::Int64, 3, true),
12891290
DataType::new_fixed_size_list(DataType::Int64, 5, true),
@@ -1295,7 +1296,7 @@ mod tests {
12951296
DataType::new_list(DataType::Int32, true),
12961297
];
12971298
assert_eq!(
1298-
get_valid_types(function, &signature, &data_types)?,
1299+
get_valid_types(function, &signature.type_signature, &data_types)?,
12991300
vec![vec![
13001301
DataType::new_list(DataType::Int64, true),
13011302
DataType::new_list(DataType::Int64, true),
@@ -1307,7 +1308,7 @@ mod tests {
13071308
DataType::new_list(DataType::new_list(DataType::Int32, true), true),
13081309
];
13091310
assert_eq!(
1310-
get_valid_types(function, &signature, &data_types)?,
1311+
get_valid_types(function, &signature.type_signature, &data_types)?,
13111312
vec![vec![]]
13121313
);
13131314

datafusion/functions-nested/src/cardinality.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ pub fn cardinality_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
141141
generic_map_cardinality(map_array)
142142
}
143143
arg_type => {
144-
exec_err!("cardinality does not support an argument of type '{arg_type}'")
144+
exec_err!("cardinality does not support type {arg_type}")
145145
}
146146
}
147147
}

datafusion/functions-nested/src/concat.rs

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,7 @@ impl ScalarUDFImpl for ArrayConcat {
296296
DataType::Null | DataType::List(_) | DataType::FixedSizeList(..) => (),
297297
DataType::LargeList(_) => large_list = true,
298298
arg_type => {
299-
return plan_err!(
300-
"{} does not support an argument of type {arg_type}",
301-
self.name()
302-
)
299+
return plan_err!("{} does not support type {arg_type}", self.name())
303300
}
304301
}
305302

@@ -446,28 +443,22 @@ fn concat_internal<O: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
446443
/// Array_append SQL function
447444
pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
448445
let [array, values] = take_function_args("array_append", args)?;
449-
450446
match array.data_type() {
451447
DataType::Null => make_array_inner(&[Arc::clone(values)]),
452448
DataType::List(_) => general_append_and_prepend::<i32>(args, true),
453449
DataType::LargeList(_) => general_append_and_prepend::<i64>(args, true),
454-
arg_type => {
455-
exec_err!("array_append does not support an argument of type {arg_type}")
456-
}
450+
arg_type => exec_err!("array_append does not support type {arg_type}"),
457451
}
458452
}
459453

460454
/// Array_prepend SQL function
461455
pub(crate) fn array_prepend_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
462456
let [values, array] = take_function_args("array_prepend", args)?;
463-
464457
match array.data_type() {
465458
DataType::Null => make_array_inner(&[Arc::clone(values)]),
466459
DataType::List(_) => general_append_and_prepend::<i32>(args, false),
467460
DataType::LargeList(_) => general_append_and_prepend::<i64>(args, false),
468-
arg_type => {
469-
exec_err!("array_prepend does not support an argument of type {arg_type}")
470-
}
461+
arg_type => exec_err!("array_prepend does not support type {arg_type}"),
471462
}
472463
}
473464

datafusion/functions-nested/src/dimension.rs

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@
1717

1818
//! [`ScalarUDFImpl`] definitions for array_dims and array_ndims functions.
1919
20-
use arrow::array::{
21-
Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, UInt64Array,
22-
};
20+
use arrow::array::{Array, ArrayRef, ListArray, UInt64Array};
2321
use arrow::datatypes::{
2422
DataType,
25-
DataType::{LargeList, List, Null, UInt64},
23+
DataType::{FixedSizeList, LargeList, List, Null, UInt64},
2624
UInt64Type,
2725
};
2826
use std::any::Any;
2927

30-
use datafusion_common::cast::{as_large_list_array, as_list_array};
28+
use datafusion_common::cast::{
29+
as_fixed_size_list_array, as_large_list_array, as_list_array,
30+
};
3131
use datafusion_common::{exec_err, utils::take_function_args, Result};
3232

3333
use crate::utils::{compute_array_dims, make_scalar_function};
@@ -36,6 +36,7 @@ use datafusion_expr::{
3636
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3737
};
3838
use datafusion_macros::user_doc;
39+
use itertools::Itertools;
3940
use std::sync::Arc;
4041

4142
make_udf_expr_and_func!(
@@ -78,7 +79,7 @@ impl Default for ArrayDims {
7879
impl ArrayDims {
7980
pub fn new() -> Self {
8081
Self {
81-
signature: Signature::array(Volatility::Immutable),
82+
signature: Signature::arrays(1, None, Volatility::Immutable),
8283
aliases: vec!["list_dims".to_string()],
8384
}
8485
}
@@ -150,7 +151,7 @@ pub(super) struct ArrayNdims {
150151
impl ArrayNdims {
151152
pub fn new() -> Self {
152153
Self {
153-
signature: Signature::array(Volatility::Immutable),
154+
signature: Signature::arrays(1, None, Volatility::Immutable),
154155
aliases: vec![String::from("list_ndims")],
155156
}
156157
}
@@ -191,20 +192,21 @@ impl ScalarUDFImpl for ArrayNdims {
191192
/// Array_dims SQL function
192193
pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
193194
let [array] = take_function_args("array_dims", args)?;
194-
195-
let data = match array.data_type() {
195+
let data: Vec<_> = match array.data_type() {
196196
List(_) => as_list_array(&array)?
197197
.iter()
198198
.map(compute_array_dims)
199-
.collect::<Result<Vec<_>>>()?,
199+
.try_collect()?,
200200
LargeList(_) => as_large_list_array(&array)?
201201
.iter()
202202
.map(compute_array_dims)
203-
.collect::<Result<Vec<_>>>()?,
203+
.try_collect()?,
204+
FixedSizeList(..) => as_fixed_size_list_array(&array)?
205+
.iter()
206+
.map(compute_array_dims)
207+
.try_collect()?,
204208
arg_type => {
205-
return exec_err!(
206-
"array_dims does not support an argument of type {arg_type}"
207-
);
209+
return exec_err!("array_dims does not support type {arg_type}");
208210
}
209211
};
210212

@@ -216,9 +218,7 @@ pub fn array_dims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
216218
pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
217219
let [array] = take_function_args("array_ndims", args)?;
218220

219-
fn general_list_ndims<O: OffsetSizeTrait>(
220-
array: &GenericListArray<O>,
221-
) -> Result<ArrayRef> {
221+
fn general_list_ndims(array: &ArrayRef) -> Result<ArrayRef> {
222222
let ndims = list_ndims(array.data_type());
223223
let data = vec![ndims; array.len()];
224224
let result = UInt64Array::new(data.into(), array.nulls().cloned());
@@ -227,16 +227,7 @@ pub fn array_ndims_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
227227

228228
match array.data_type() {
229229
Null => Ok(Arc::new(UInt64Array::new_null(array.len()))),
230-
List(_) => {
231-
let array = as_list_array(array)?;
232-
general_list_ndims::<i32>(array)
233-
}
234-
LargeList(_) => {
235-
let array = as_large_list_array(array)?;
236-
general_list_ndims::<i64>(array)
237-
}
238-
arg_type => {
239-
exec_err!("array_ndims does not support an argument of type type {arg_type}")
240-
}
230+
List(_) | LargeList(_) | FixedSizeList(..) => general_list_ndims(array),
231+
arg_type => exec_err!("array_ndims does not support type {arg_type}"),
241232
}
242233
}

datafusion/functions-nested/src/distance.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,7 @@ impl ScalarUDFImpl for ArrayDistance {
120120
coercion,
121121
))
122122
} else {
123-
plan_err!(
124-
"{} does not support an argument of type {arg_type}",
125-
self.name()
126-
)
123+
plan_err!("{} does not support type {arg_type}", self.name())
127124
}
128125
});
129126

@@ -148,12 +145,11 @@ impl ScalarUDFImpl for ArrayDistance {
148145

149146
pub fn array_distance_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
150147
let [array1, array2] = take_function_args("array_distance", args)?;
151-
152148
match (array1.data_type(), array2.data_type()) {
153149
(List(_), List(_)) => general_array_distance::<i32>(args),
154150
(LargeList(_), LargeList(_)) => general_array_distance::<i64>(args),
155151
(arg_type1, arg_type2) => {
156-
exec_err!("array_distance does not support arguments of type {arg_type1} and {arg_type2:?}")
152+
exec_err!("array_distance does not support types {arg_type1} and {arg_type2}")
157153
}
158154
}
159155
}

datafusion/functions-nested/src/empty.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@
1818
//! [`ScalarUDFImpl`] definitions for array_empty function.
1919
2020
use crate::utils::make_scalar_function;
21-
use arrow::array::{ArrayRef, BooleanArray, OffsetSizeTrait};
21+
use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait};
22+
use arrow::buffer::BooleanBuffer;
2223
use arrow::datatypes::{
2324
DataType,
2425
DataType::{Boolean, FixedSizeList, LargeList, List},
2526
};
2627
use datafusion_common::cast::as_generic_list_array;
27-
use datafusion_common::{exec_err, plan_err, utils::take_function_args, Result};
28+
use datafusion_common::{exec_err, utils::take_function_args, Result};
2829
use datafusion_expr::{
2930
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3031
};
@@ -71,7 +72,7 @@ impl Default for ArrayEmpty {
7172
impl ArrayEmpty {
7273
pub fn new() -> Self {
7374
Self {
74-
signature: Signature::array(Volatility::Immutable),
75+
signature: Signature::arrays(1, None, Volatility::Immutable),
7576
aliases: vec!["array_empty".to_string(), "list_empty".to_string()],
7677
}
7778
}
@@ -89,13 +90,8 @@ impl ScalarUDFImpl for ArrayEmpty {
8990
&self.signature
9091
}
9192

92-
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
93-
Ok(match arg_types[0] {
94-
List(_) | LargeList(_) | FixedSizeList(_, _) => Boolean,
95-
_ => {
96-
return plan_err!("The array_empty function can only accept List/LargeList/FixedSizeList.");
97-
}
98-
})
93+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
94+
Ok(Boolean)
9995
}
10096

10197
fn invoke_with_args(
@@ -117,21 +113,25 @@ impl ScalarUDFImpl for ArrayEmpty {
117113
/// Array_empty SQL function
118114
pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
119115
let [array] = take_function_args("array_empty", args)?;
120-
121-
let array_type = array.data_type();
122-
match array_type {
116+
match array.data_type() {
123117
List(_) => general_array_empty::<i32>(array),
124118
LargeList(_) => general_array_empty::<i64>(array),
125-
_ => exec_err!("array_empty does not support type '{array_type:?}'."),
119+
FixedSizeList(_, size) => {
120+
let values = if *size == 0 {
121+
BooleanBuffer::new_set(array.len())
122+
} else {
123+
BooleanBuffer::new_unset(array.len())
124+
};
125+
Ok(Arc::new(BooleanArray::new(values, array.nulls().cloned())))
126+
}
127+
arg_type => exec_err!("array_empty does not support type {arg_type}"),
126128
}
127129
}
128130

129131
fn general_array_empty<O: OffsetSizeTrait>(array: &ArrayRef) -> Result<ArrayRef> {
130-
let array = as_generic_list_array::<O>(array)?;
131-
132-
let builder = array
132+
let result = as_generic_list_array::<O>(array)?
133133
.iter()
134134
.map(|arr| arr.map(|arr| arr.is_empty()))
135135
.collect::<BooleanArray>();
136-
Ok(Arc::new(builder))
136+
Ok(Arc::new(result))
137137
}

datafusion/functions-nested/src/extract.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,7 @@ impl ScalarUDFImpl for ArrayElement {
165165
match &arg_types[0] {
166166
Null => Ok(Null),
167167
List(field) | LargeList(field) => Ok(field.data_type().clone()),
168-
arg_type => plan_err!(
169-
"{} does not support an argument of type {arg_type}",
170-
self.name()
171-
),
168+
arg_type => plan_err!("{} does not support type {arg_type}", self.name()),
172169
}
173170
}
174171

@@ -211,7 +208,7 @@ fn array_element_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
211208
general_array_element::<i64>(array, indexes)
212209
}
213210
arg_type => {
214-
exec_err!("array_element does not support an argument of type {arg_type}")
211+
exec_err!("array_element does not support type {arg_type}")
215212
}
216213
}
217214
}

0 commit comments

Comments
 (0)