Skip to content

Commit

Permalink
feat: make_array support empty arguments (apache#6593)
Browse files Browse the repository at this point in the history
* feat: make_array support empty arguments

* fix fmt error

* fix error

* array_append support empty array

* array_prepend support empty make_array

* array_concat support empty make_array

* fix clippy

* update

* fix

* rename `array_make` --> `make_array`

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
2 people authored and jayzhan211 committed Jun 12, 2023
1 parent 5e8648f commit 3b8d46b
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 34 deletions.
102 changes: 99 additions & 3 deletions datafusion/core/tests/sqllogictests/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,49 @@ select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)
----
[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]

# array_append scalar function
# array scalar function #6
query ? rowsort
select make_array()
----
[]

# array scalar function #7
query ?? rowsort
select make_array(make_array()), make_array(make_array(make_array()))
----
[[]] [[[]]]

# array_append scalar function #1
query ? rowsort
select array_append(make_array(), 4);
----
[4]

# array_append scalar function #2
query ?? rowsort
select array_append(make_array(), make_array()), array_append(make_array(), make_array(4));
----
[[]] [[4]]

# array_append scalar function #3
query ??? rowsort
select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]

# array_prepend scalar function
# array_prepend scalar function #1
query ? rowsort
select array_prepend(4, make_array());
----
[4]

# array_prepend scalar function #2
query ?? rowsort
select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array());
----
[[]] [[4]]

# array_prepend scalar function #3
query ??? rowsort
select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
----
Expand All @@ -73,6 +109,12 @@ select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2,
----
[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]]

# array_fill scalar function #3
query ?
select array_fill(1, make_array())
----
[]

# array_concat scalar function #1
query ?? rowsort
select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4]));
Expand All @@ -97,6 +139,18 @@ select array_concat(make_array([[1]]), make_array([[2]]));
----
[[[1]], [[2]]]

# array_concat scalar function #5
query ? rowsort
select array_concat(make_array(2, 3), make_array());
----
[2, 3]

# array_concat scalar function #6
query ? rowsort
select array_concat(make_array(), make_array(2, 3));
----
[2, 3]

# array_position scalar function #1
query III
select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1);
Expand Down Expand Up @@ -133,6 +187,12 @@ select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]]
----
11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3

# array_to_string scalar function #3
query ?
select array_to_string(make_array(), ',')
----
(empty)

# cardinality scalar function
query III
select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o'));
Expand All @@ -145,7 +205,13 @@ select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3
----
6 18

# trim_array scalar function
# cardinality scalar function #3
query II
select cardinality(make_array()), cardinality(make_array(make_array()))
----
0 0

# trim_array scalar function #1
query ???
select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', 'o'], 3), trim_array([1.0, 2.0, 3.0], 2);
----
Expand All @@ -157,6 +223,18 @@ select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4,
----
[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]

# trim_array scalar function #3
query ?
select array_concat(trim_array(make_array(1, 2, 3), 3), make_array(4, 5), make_array());
----
[4, 5]

# trim_array scalar function #4
query ??
select trim_array(make_array(), 0), trim_array(make_array(), 1)
----
[] []

# array_length scalar function
query III rowsort
select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
Expand All @@ -181,6 +259,12 @@ select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3,
----
3 2 5 NULL

# array_length scalar function #5
query III rowsort
select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2)
----
0 0 NULL

# array_dims scalar function
query III rowsort
select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]]));
Expand All @@ -193,6 +277,12 @@ select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])
----
[1, 2, 3] [2, 5, 4]

# array_dims scalar function #3
query II rowsort
select array_dims(make_array()), array_dims(make_array(make_array()))
----
[0] [1, 0]

# array_ndims scalar function
query III rowsort
select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]]));
Expand All @@ -209,3 +299,9 @@ query ?
select make_array(1, 2.0)
----
[1.0, 2.0]
# array_ndims scalar function #3

query II rowsort
select array_ndims(make_array()), array_ndims(make_array(make_array()))
----
1 2
14 changes: 9 additions & 5 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::CurrentDate
| BuiltinScalarFunction::CurrentTime
| BuiltinScalarFunction::Uuid
| BuiltinScalarFunction::MakeArray
)
}
/// Returns the [Volatility] of the builtin function.
Expand Down Expand Up @@ -510,11 +511,14 @@ impl BuiltinScalarFunction {
))),
},
BuiltinScalarFunction::Cardinality => Ok(UInt64),
BuiltinScalarFunction::MakeArray => Ok(List(Arc::new(Field::new(
"item",
input_expr_types[0].clone(),
true,
)))),
BuiltinScalarFunction::MakeArray => match input_expr_types.len() {
0 => Ok(List(Arc::new(Field::new("item", Null, true)))),
_ => Ok(List(Arc::new(Field::new(
"item",
input_expr_types[0].clone(),
true,
)))),
},
BuiltinScalarFunction::TrimArray => match &input_expr_types[0] {
List(field) => Ok(List(Arc::new(Field::new(
"item",
Expand Down
82 changes: 57 additions & 25 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,15 @@ pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
Ok(ColumnarValue::Array(array_array(arrays.as_slice())?))
}

/// `make_array` SQL function
pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
match values[0].data_type() {
DataType::Null => Ok(datafusion_expr::ColumnarValue::Scalar(
ScalarValue::new_list(Some(vec![]), DataType::Null),
)),
_ => array(values),
}
}
macro_rules! downcast_arg {
($ARG:expr, $ARRAY_TYPE:ident) => {{
$ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
Expand Down Expand Up @@ -230,6 +239,7 @@ pub fn array_append(args: &[ColumnarValue]) -> Result<ColumnarValue> {
(DataType::UInt16, DataType::UInt16) => append!(arr, element, UInt16Array),
(DataType::UInt32, DataType::UInt32) => append!(arr, element, UInt32Array),
(DataType::UInt64, DataType::UInt64) => append!(arr, element, UInt64Array),
(DataType::Null, _) => return array(&args[1..]),
(array_data_type, element_data_type) => {
return Err(DataFusionError::NotImplemented(format!(
"Array_append is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'."
Expand Down Expand Up @@ -303,6 +313,7 @@ pub fn array_prepend(args: &[ColumnarValue]) -> Result<ColumnarValue> {
(DataType::UInt16, DataType::UInt16) => prepend!(arr, element, UInt16Array),
(DataType::UInt32, DataType::UInt32) => prepend!(arr, element, UInt32Array),
(DataType::UInt64, DataType::UInt64) => prepend!(arr, element, UInt64Array),
(DataType::Null, _) => return array(&args[..1]),
(array_data_type, element_data_type) => {
return Err(DataFusionError::NotImplemented(format!(
"Array_prepend is not implemented for types '{array_data_type:?}' and '{element_data_type:?}'."
Expand Down Expand Up @@ -331,30 +342,36 @@ pub fn array_concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
.collect();
let data_type = arrays[0].data_type();
match data_type {
DataType::List(..) => {
let list_arrays =
downcast_vec!(arrays, ListArray).collect::<Result<Vec<&ListArray>>>()?;
let len: usize = list_arrays.iter().map(|a| a.values().len()).sum();
let capacity = Capacities::Array(list_arrays.iter().map(|a| a.len()).sum());
let array_data: Vec<_> =
list_arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
let array_data = array_data.iter().collect();
let mut mutable =
MutableArrayData::with_capacities(array_data, false, capacity);

for (i, a) in list_arrays.iter().enumerate() {
mutable.extend(i, 0, a.len())
}
DataType::List(field) => match field.data_type() {
DataType::Null => array_concat(&args[1..]),
_ => {
let list_arrays = downcast_vec!(arrays, ListArray)
.collect::<Result<Vec<&ListArray>>>()?;
let len: usize = list_arrays.iter().map(|a| a.values().len()).sum();
let capacity =
Capacities::Array(list_arrays.iter().map(|a| a.len()).sum());
let array_data: Vec<_> =
list_arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
let array_data = array_data.iter().collect();
let mut mutable =
MutableArrayData::with_capacities(array_data, false, capacity);

for (i, a) in list_arrays.iter().enumerate() {
mutable.extend(i, 0, a.len())
}

let builder = mutable.into_builder();
let list = builder
.len(1)
.buffers(vec![Buffer::from_slice_ref([0, len as i32])])
.build()
.unwrap();
let builder = mutable.into_builder();
let list = builder
.len(1)
.buffers(vec![Buffer::from_slice_ref([0, len as i32])])
.build()
.unwrap();

return Ok(ColumnarValue::Array(Arc::new(make_array(list))));
}
return Ok(ColumnarValue::Array(Arc::new(arrow::array::make_array(
list,
))));
}
},
_ => Err(DataFusionError::NotImplemented(format!(
"Array is not type '{data_type:?}'."
))),
Expand Down Expand Up @@ -423,6 +440,11 @@ pub fn array_fill(args: &[ColumnarValue]) -> Result<ColumnarValue> {
DataType::UInt16 => fill!(array_values, element, UInt16Array),
DataType::UInt32 => fill!(array_values, element, UInt32Array),
DataType::UInt64 => fill!(array_values, element, UInt64Array),
DataType::Null => {
return Ok(datafusion_expr::ColumnarValue::Scalar(
ScalarValue::new_list(Some(vec![]), DataType::Null),
))
}
data_type => {
return Err(DataFusionError::Internal(format!(
"Array_fill is not implemented for type '{data_type:?}'."
Expand Down Expand Up @@ -836,6 +858,7 @@ pub fn array_to_string(args: &[ColumnarValue]) -> Result<ColumnarValue> {
DataType::UInt16 => to_string!(arg, arr, &delimeter, UInt16Array),
DataType::UInt32 => to_string!(arg, arr, &delimeter, UInt32Array),
DataType::UInt64 => to_string!(arg, arr, &delimeter, UInt64Array),
DataType::Null => Ok(arg),
data_type => Err(DataFusionError::NotImplemented(format!(
"Array is not implemented for type '{data_type:?}'."
))),
Expand All @@ -844,8 +867,13 @@ pub fn array_to_string(args: &[ColumnarValue]) -> Result<ColumnarValue> {

let mut arg = String::from("");
let mut res = compute_array_to_string(&mut arg, arr, delimeter.clone())?.clone();
res.truncate(res.len() - delimeter.len());
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res))))
match res.as_str() {
"" => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res)))),
_ => {
res.truncate(res.len() - delimeter.len());
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(res))))
}
}
}

/// Trim_array SQL function
Expand Down Expand Up @@ -884,8 +912,12 @@ pub fn trim_array(args: &[ColumnarValue]) -> Result<ColumnarValue> {

let list_array = downcast_arg!(arr, ListArray);
let values = list_array.value(0);
if values.len() <= n {
return Ok(datafusion_expr::ColumnarValue::Scalar(
ScalarValue::new_list(Some(vec![]), DataType::Null),
));
}
let res = values.slice(0, values.len() - n);

let mut scalars = vec![];
for i in 0..res.len() {
scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&res, i)?));
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ pub fn create_physical_fun(
Arc::new(array_expressions::array_to_string)
}
BuiltinScalarFunction::Cardinality => Arc::new(array_expressions::cardinality),
BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::array),
BuiltinScalarFunction::MakeArray => Arc::new(array_expressions::make_array),
BuiltinScalarFunction::TrimArray => Arc::new(array_expressions::trim_array),

// string functions
Expand Down

0 comments on commit 3b8d46b

Please sign in to comment.