From 15f59d9861082a4d5d39bddce63d81cc7b9fb299 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Wed, 31 Jan 2024 19:33:40 +0800 Subject: [PATCH] feat: support array_reverse (#9023) * support array_reverse * fix typo * add test * fix NULL * fix parse_expr * fix typo * fix null in column * fix null * add md * fix ci * add test for fixedsizelist * skip null and speed up * fix fmt * fix clippy * reduce code complex --- datafusion/expr/src/built_in_function.rs | 6 ++ datafusion/expr/src/expr_fn.rs | 6 ++ .../physical-expr/src/array_expressions.rs | 69 +++++++++++++++++++ datafusion/physical-expr/src/functions.rs | 3 + datafusion/proto/proto/datafusion.proto | 1 + datafusion/proto/src/generated/pbjson.rs | 3 + datafusion/proto/src/generated/prost.rs | 3 + .../proto/src/logical_plan/from_proto.rs | 9 ++- datafusion/proto/src/logical_plan/to_proto.rs | 1 + datafusion/sqllogictest/test_files/array.slt | 34 +++++++++ .../source/user-guide/sql/scalar_functions.md | 33 +++++++++ 11 files changed, 167 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index f2eb82ebf9bd..b7bb17c86be7 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction { ArrayReplaceN, /// array_replace_all ArrayReplaceAll, + /// array_reverse + ArrayReverse, /// array_slice ArraySlice, /// array_to_string @@ -427,6 +429,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable, BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable, + BuiltinScalarFunction::ArrayReverse => Volatility::Immutable, BuiltinScalarFunction::Flatten => Volatility::Immutable, BuiltinScalarFunction::ArraySlice => Volatility::Immutable, BuiltinScalarFunction::ArrayToString => Volatility::Immutable, @@ -622,6 +625,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()), + BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayToString => Ok(Utf8), @@ -961,6 +965,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { Signature::any(3, self.volatility()) } + BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArraySlice => { Signature::variadic_any(self.volatility()) } @@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReplaceAll => { &["array_replace_all", "list_replace_all"] } + BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"], BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"], BuiltinScalarFunction::ArrayToString => &[ "array_to_string", diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4608badde231..877066aabfed 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -728,6 +728,12 @@ scalar_expr!( array from to, "replaces all occurrences of the specified element with another specified element." ); +scalar_expr!( + ArrayReverse, + array_reverse, + array, + "reverses the order of elements in the array." +); scalar_expr!( ArraySlice, array_slice, diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index a3dec2762c10..844dae0917c7 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2766,6 +2766,75 @@ where )?)) } +/// array_reverse SQL function +pub fn array_reverse(arg: &[ArrayRef]) -> Result { + if arg.len() != 1 { + return exec_err!("array_reverse needs one argument"); + } + + match &arg[0].data_type() { + DataType::List(field) => { + let array = as_list_array(&arg[0])?; + general_array_reverse::(array, field) + } + DataType::LargeList(field) => { + let array = as_large_list_array(&arg[0])?; + general_array_reverse::(array, field) + } + DataType::Null => Ok(arg[0].clone()), + array_type => exec_err!("array_reverse does not support type '{array_type:?}'."), + } +} + +fn general_array_reverse( + array: &GenericListArray, + field: &FieldRef, +) -> Result +where + O: TryFrom, +{ + let values = array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(original_data.len()); + let mut offsets = vec![O::usize_as(0)]; + let mut nulls = vec![]; + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], false, capacity); + + for (row_index, offset_window) in array.offsets().windows(2).enumerate() { + // skip the null value + if array.is_null(row_index) { + nulls.push(false); + offsets.push(offsets[row_index] + O::one()); + mutable.extend(0, 0, 1); + continue; + } else { + nulls.push(true); + } + + let start = offset_window[0]; + let end = offset_window[1]; + + let mut index = end - O::one(); + let mut cnt = 0; + + while index >= start { + mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1); + index = index - O::one(); + cnt += 1; + } + offsets.push(offsets[row_index] + O::usize_as(cnt)); + } + + let data = mutable.freeze(); + Ok(Arc::new(GenericListArray::::try_new( + field.clone(), + OffsetBuffer::::new(offsets.into()), + arrow_array::make_array(data), + Some(nulls.into()), + )?)) +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index cd4e6f96f0fe..21eaeab7213a 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -445,6 +445,9 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_replace_all)(args) }), + BuiltinScalarFunction::ArrayReverse => Arc::new(|args| { + make_scalar_function_inner(array_expressions::array_reverse)(args) + }), BuiltinScalarFunction::ArraySlice => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_slice)(args) }), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 0b93820db841..f2b5c5dd4239 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -670,6 +670,7 @@ enum ScalarFunction { EndsWith = 131; InStr = 132; MakeDate = 133; + ArrayReverse = 134; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 55e83a885382..b9a8c5fc0782 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22422,6 +22422,7 @@ impl serde::Serialize for ScalarFunction { Self::EndsWith => "EndsWith", Self::InStr => "InStr", Self::MakeDate => "MakeDate", + Self::ArrayReverse => "ArrayReverse", }; serializer.serialize_str(variant) } @@ -22565,6 +22566,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "EndsWith", "InStr", "MakeDate", + "ArrayReverse", ]; struct GeneratedVisitor; @@ -22737,6 +22739,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "EndsWith" => Ok(ScalarFunction::EndsWith), "InStr" => Ok(ScalarFunction::InStr), "MakeDate" => Ok(ScalarFunction::MakeDate), + "ArrayReverse" => Ok(ScalarFunction::ArrayReverse), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index b17bcd3a49d7..758ef2dcb5f3 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2765,6 +2765,7 @@ pub enum ScalarFunction { EndsWith = 131, InStr = 132, MakeDate = 133, + ArrayReverse = 134, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2905,6 +2906,7 @@ impl ScalarFunction { ScalarFunction::EndsWith => "EndsWith", ScalarFunction::InStr => "InStr", ScalarFunction::MakeDate => "MakeDate", + ScalarFunction::ArrayReverse => "ArrayReverse", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -3042,6 +3044,7 @@ impl ScalarFunction { "EndsWith" => Some(Self::EndsWith), "InStr" => Some(Self::InStr), "MakeDate" => Some(Self::MakeDate), + "ArrayReverse" => Some(Self::ArrayReverse), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index b025f79bd1d0..decf3b18745f 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -44,7 +44,6 @@ use datafusion_common::{ Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; -use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct, @@ -72,6 +71,10 @@ use datafusion_expr::{ JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, }; +use datafusion_expr::{ + array_reverse, + expr::{Alias, Placeholder}, +}; #[derive(Debug)] pub enum Error { @@ -501,6 +504,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayReplace => Self::ArrayReplace, ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, + ScalarFunction::ArrayReverse => Self::ArrayReverse, ScalarFunction::ArraySlice => Self::ArraySlice, ScalarFunction::ArrayToString => Self::ArrayToString, ScalarFunction::ArrayIntersect => Self::ArrayIntersect, @@ -1458,6 +1462,9 @@ pub fn parse_expr( parse_expr(&args[1], registry)?, parse_expr(&args[2], registry)?, )), + ScalarFunction::ArrayReverse => { + Ok(array_reverse(parse_expr(&args[0], registry)?)) + } ScalarFunction::ArraySlice => Ok(array_slice( parse_expr(&args[0], registry)?, parse_expr(&args[1], registry)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index f7be15136bbb..e094994840b2 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace, BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN, BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll, + BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse, BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayToString => Self::ArrayToString, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index e072e4146f13..e6a8181be1ac 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -5054,6 +5054,40 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [ ---- [[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]] +## array_reverse +query ?? +select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1)); +---- +[3, 2, 1] [1] + +query ?? +select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)')); +---- +[3, 2, 1] [1] + +#TODO: support after FixedSizeList type coercion +#query ?? +#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +#---- +#[3, 2, 1] [1] + +query ?? +select array_reverse(NULL), array_reverse([]); +---- +NULL [] + +query ?? +select array_reverse(column1), column1 from arrays_values; +---- +[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10] +[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20] +[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30] +[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40] +NULL NULL +[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] +[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60] +[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] + ### Delete tables statement ok diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 7bec80b55e26..ba69b53e69af 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1793,6 +1793,7 @@ from_unixtime(expression) - [array_replace](#array_replace) - [array_replace_n](#array_replace_n) - [array_replace_all](#array_replace_all) +- [array_reverse](#array_reverse) - [array_slice](#array_slice) - [array_to_string](#array_to_string) - [cardinality](#cardinality) @@ -2523,6 +2524,34 @@ array_replace_all(array, from, to) - list_replace_all +### `array_reverse` + +Returns the array with the order of the elements reversed. + +``` +array_reverse(array) +``` + +#### Arguments + +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select array_reverse([1, 2, 3, 4]); ++------------------------------------------------------------+ +| array_reverse(List([1, 2, 3, 4])) | ++------------------------------------------------------------+ +| [4, 3, 2, 1] | ++------------------------------------------------------------+ +``` + +#### Aliases + +- list_reverse + ### `array_slice` Returns a slice of the array based on 1-indexed start and end positions. @@ -2823,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._ _Alias of [array_replace_all](#array_replace_all)._ +### `list_reverse` + +_Alias of [array_reverse](#array_reverse)._ + ### `list_slice` _Alias of [array_slice](#array_slice)._