Skip to content

Commit

Permalink
feat: support array_reverse (#9023)
Browse files Browse the repository at this point in the history
* support array_reverse

* fix typo

* add test

* fix NULL

* fix parse_expr

* fix typo

* fix null in column

* fix null

* add md

* fix ci

* add test for fixedsizelist

* skip null and speed up

* fix fmt

* fix clippy

* reduce code complex
  • Loading branch information
Weijun-H authored Jan 31, 2024
1 parent feeee04 commit 15f59d9
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 1 deletion.
6 changes: 6 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ pub enum BuiltinScalarFunction {
ArrayReplaceN,
/// array_replace_all
ArrayReplaceAll,
/// array_reverse
ArrayReverse,
/// array_slice
ArraySlice,
/// array_to_string
Expand Down Expand Up @@ -427,6 +429,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceN => Volatility::Immutable,
BuiltinScalarFunction::ArrayReplaceAll => Volatility::Immutable,
BuiltinScalarFunction::ArrayReverse => Volatility::Immutable,
BuiltinScalarFunction::Flatten => Volatility::Immutable,
BuiltinScalarFunction::ArraySlice => Volatility::Immutable,
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
Expand Down Expand Up @@ -622,6 +625,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplace => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceN => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReplaceAll => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayReverse => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArraySlice => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayResize => Ok(input_expr_types[0].clone()),
BuiltinScalarFunction::ArrayToString => Ok(Utf8),
Expand Down Expand Up @@ -961,6 +965,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
Signature::any(3, self.volatility())
}
BuiltinScalarFunction::ArrayReverse => Signature::any(1, self.volatility()),
BuiltinScalarFunction::ArraySlice => {
Signature::variadic_any(self.volatility())
}
Expand Down Expand Up @@ -1567,6 +1572,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::ArrayReplaceAll => {
&["array_replace_all", "list_replace_all"]
}
BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"],
BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"],
BuiltinScalarFunction::ArrayToString => &[
"array_to_string",
Expand Down
6 changes: 6 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,12 @@ scalar_expr!(
array from to,
"replaces all occurrences of the specified element with another specified element."
);
scalar_expr!(
ArrayReverse,
array_reverse,
array,
"reverses the order of elements in the array."
);
scalar_expr!(
ArraySlice,
array_slice,
Expand Down
69 changes: 69 additions & 0 deletions datafusion/physical-expr/src/array_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2766,6 +2766,75 @@ where
)?))
}

/// array_reverse SQL function
pub fn array_reverse(arg: &[ArrayRef]) -> Result<ArrayRef> {
if arg.len() != 1 {
return exec_err!("array_reverse needs one argument");
}

match &arg[0].data_type() {
DataType::List(field) => {
let array = as_list_array(&arg[0])?;
general_array_reverse::<i32>(array, field)
}
DataType::LargeList(field) => {
let array = as_large_list_array(&arg[0])?;
general_array_reverse::<i64>(array, field)
}
DataType::Null => Ok(arg[0].clone()),
array_type => exec_err!("array_reverse does not support type '{array_type:?}'."),
}
}

fn general_array_reverse<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
field: &FieldRef,
) -> Result<ArrayRef>
where
O: TryFrom<i64>,
{
let values = array.values();
let original_data = values.to_data();
let capacity = Capacities::Array(original_data.len());
let mut offsets = vec![O::usize_as(0)];
let mut nulls = vec![];
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], false, capacity);

for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
// skip the null value
if array.is_null(row_index) {
nulls.push(false);
offsets.push(offsets[row_index] + O::one());
mutable.extend(0, 0, 1);
continue;
} else {
nulls.push(true);
}

let start = offset_window[0];
let end = offset_window[1];

let mut index = end - O::one();
let mut cnt = 0;

while index >= start {
mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
index = index - O::one();
cnt += 1;
}
offsets.push(offsets[row_index] + O::usize_as(cnt));
}

let data = mutable.freeze();
Ok(Arc::new(GenericListArray::<O>::try_new(
field.clone(),
OffsetBuffer::<O>::new(offsets.into()),
arrow_array::make_array(data),
Some(nulls.into()),
)?))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
3 changes: 3 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::ArrayReplaceAll => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_replace_all)(args)
}),
BuiltinScalarFunction::ArrayReverse => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_reverse)(args)
}),
BuiltinScalarFunction::ArraySlice => Arc::new(|args| {
make_scalar_function_inner(array_expressions::array_slice)(args)
}),
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ enum ScalarFunction {
EndsWith = 131;
InStr = 132;
MakeDate = 133;
ArrayReverse = 134;
}

message ScalarFunctionNode {
Expand Down
3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ use datafusion_common::{
Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference,
Result, ScalarValue,
};
use datafusion_expr::expr::{Alias, Placeholder};
use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
use datafusion_expr::{
abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
Expand Down Expand Up @@ -72,6 +71,10 @@ use datafusion_expr::{
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
WindowFrameUnits,
};
use datafusion_expr::{
array_reverse,
expr::{Alias, Placeholder},
};

#[derive(Debug)]
pub enum Error {
Expand Down Expand Up @@ -501,6 +504,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::ArrayReplace => Self::ArrayReplace,
ScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
ScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
ScalarFunction::ArrayReverse => Self::ArrayReverse,
ScalarFunction::ArraySlice => Self::ArraySlice,
ScalarFunction::ArrayToString => Self::ArrayToString,
ScalarFunction::ArrayIntersect => Self::ArrayIntersect,
Expand Down Expand Up @@ -1458,6 +1462,9 @@ pub fn parse_expr(
parse_expr(&args[1], registry)?,
parse_expr(&args[2], registry)?,
)),
ScalarFunction::ArrayReverse => {
Ok(array_reverse(parse_expr(&args[0], registry)?))
}
ScalarFunction::ArraySlice => Ok(array_slice(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1500,6 +1500,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::ArrayReplace => Self::ArrayReplace,
BuiltinScalarFunction::ArrayReplaceN => Self::ArrayReplaceN,
BuiltinScalarFunction::ArrayReplaceAll => Self::ArrayReplaceAll,
BuiltinScalarFunction::ArrayReverse => Self::ArrayReverse,
BuiltinScalarFunction::ArraySlice => Self::ArraySlice,
BuiltinScalarFunction::ArrayToString => Self::ArrayToString,
BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect,
Expand Down
34 changes: 34 additions & 0 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5054,6 +5054,40 @@ select array_resize(arrow_cast([[1], [2], [3]], 'LargeList(List(Int64))'), 10, [
----
[[1], [2], [3], [5], [5], [5], [5], [5], [5], [5]]

## array_reverse
query ??
select array_reverse(make_array(1, 2, 3)), array_reverse(make_array(1));
----
[3, 2, 1] [1]

query ??
select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_reverse(arrow_cast(make_array(1), 'LargeList(Int64)'));
----
[3, 2, 1] [1]

#TODO: support after FixedSizeList type coercion
#query ??
#select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
#----
#[3, 2, 1] [1]

query ??
select array_reverse(NULL), array_reverse([]);
----
NULL []

query ??
select array_reverse(column1), column1 from arrays_values;
----
[10, 9, 8, 7, 6, 5, 4, 3, 2, ] [, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[20, , 18, 17, 16, 15, 14, 13, 12, 11] [11, 12, 13, 14, 15, 16, 17, 18, , 20]
[30, 29, 28, 27, 26, 25, , 23, 22, 21] [21, 22, 23, , 25, 26, 27, 28, 29, 30]
[40, 39, 38, 37, , 35, 34, 33, 32, 31] [31, 32, 33, 34, 35, , 37, 38, 39, 40]
NULL NULL
[50, 49, 48, 47, 46, 45, 44, 43, 42, 41] [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
[60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60]
[70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70]

### Delete tables

statement ok
Expand Down
33 changes: 33 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1793,6 +1793,7 @@ from_unixtime(expression)
- [array_replace](#array_replace)
- [array_replace_n](#array_replace_n)
- [array_replace_all](#array_replace_all)
- [array_reverse](#array_reverse)
- [array_slice](#array_slice)
- [array_to_string](#array_to_string)
- [cardinality](#cardinality)
Expand Down Expand Up @@ -2523,6 +2524,34 @@ array_replace_all(array, from, to)

- list_replace_all

### `array_reverse`

Returns the array with the order of the elements reversed.

```
array_reverse(array)
```

#### Arguments

- **array**: Array expression.
Can be a constant, column, or function, and any combination of array operators.

#### Example

```
❯ select array_reverse([1, 2, 3, 4]);
+------------------------------------------------------------+
| array_reverse(List([1, 2, 3, 4])) |
+------------------------------------------------------------+
| [4, 3, 2, 1] |
+------------------------------------------------------------+
```

#### Aliases

- list_reverse

### `array_slice`

Returns a slice of the array based on 1-indexed start and end positions.
Expand Down Expand Up @@ -2823,6 +2852,10 @@ _Alias of [array_replace_n](#array_replace_n)._

_Alias of [array_replace_all](#array_replace_all)._

### `list_reverse`

_Alias of [array_reverse](#array_reverse)._

### `list_slice`

_Alias of [array_slice](#array_slice)._
Expand Down

0 comments on commit 15f59d9

Please sign in to comment.