-
Notifications
You must be signed in to change notification settings - Fork 1.5k
'Rename array()
function to make_array()
, extend array[]
#3122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,8 +111,8 @@ async fn query_concat() -> Result<()> { | |
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_array() -> Result<()> { | ||
// Return a session context with table "test" registered with 2 columns | ||
fn array_context() -> SessionContext { | ||
let schema = Arc::new(Schema::new(vec![ | ||
Field::new("c1", DataType::Utf8, false), | ||
Field::new("c2", DataType::Int32, true), | ||
|
@@ -124,43 +124,110 @@ async fn query_array() -> Result<()> { | |
Arc::new(StringArray::from_slice(&["", "a", "aa", "aaa"])), | ||
Arc::new(Int32Array::from(vec![Some(0), Some(1), None, Some(3)])), | ||
], | ||
)?; | ||
) | ||
.unwrap(); | ||
|
||
let table = MemTable::try_new(schema, vec![vec![data]])?; | ||
let table = MemTable::try_new(schema, vec![vec![data]]).unwrap(); | ||
|
||
let ctx = SessionContext::new(); | ||
ctx.register_table("test", Arc::new(table))?; | ||
let sql = "SELECT array(c1, cast(c2 as varchar)) FROM test"; | ||
ctx.register_table("test", Arc::new(table)).unwrap(); | ||
ctx | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_array() { | ||
let ctx = array_context(); | ||
let sql = "SELECT array[c1, cast(c2 as varchar)] FROM test"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prior to this PR, this query would error (only constants were supported in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
let actual = execute_to_batches(&ctx, sql).await; | ||
let expected = vec![ | ||
"+--------------------------------------+", | ||
"| array(test.c1,CAST(test.c2 AS Utf8)) |", | ||
"+--------------------------------------+", | ||
"| [, 0] |", | ||
"| [a, 1] |", | ||
"| [aa, ] |", | ||
"| [aaa, 3] |", | ||
"+--------------------------------------+", | ||
"+----------+", | ||
"| array |", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using |
||
"+----------+", | ||
"| [, 0] |", | ||
"| [a, 1] |", | ||
"| [aa, ] |", | ||
"| [aaa, 3] |", | ||
"+----------+", | ||
]; | ||
assert_batches_eq!(expected, &actual); | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_make_array() { | ||
let ctx = array_context(); | ||
let sql = "SELECT make_array(c1, cast(c2 as varchar)) FROM test"; | ||
let actual = execute_to_batches(&ctx, sql).await; | ||
let expected = vec![ | ||
"+------------------------------------------+", | ||
"| makearray(test.c1,CAST(test.c2 AS Utf8)) |", | ||
"+------------------------------------------+", | ||
"| [, 0] |", | ||
"| [a, 1] |", | ||
"| [aa, ] |", | ||
"| [aaa, 3] |", | ||
"+------------------------------------------+", | ||
]; | ||
assert_batches_eq!(expected, &actual); | ||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_array_scalar() -> Result<()> { | ||
async fn query_array_scalar() { | ||
let ctx = SessionContext::new(); | ||
|
||
let sql = "SELECT array(1, 2, 3);"; | ||
let sql = "SELECT array[1, 2, 3];"; | ||
let actual = execute_to_batches(&ctx, sql).await; | ||
let expected = vec![ | ||
"+-----------------------------------+", | ||
"| array(Int64(1),Int64(2),Int64(3)) |", | ||
"+-----------------------------------+", | ||
"| [1, 2, 3] |", | ||
"+-----------------------------------+", | ||
"+-----------+", | ||
"| array |", | ||
"+-----------+", | ||
"| [1, 2, 3] |", | ||
"+-----------+", | ||
]; | ||
assert_batches_eq!(expected, &actual); | ||
|
||
// alternate syntax format | ||
let sql = "SELECT [1, 2, 3];"; | ||
let actual = execute_to_batches(&ctx, sql).await; | ||
assert_batches_eq!(expected, &actual); | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_array_scalar_bad_types() { | ||
let ctx = SessionContext::new(); | ||
|
||
// no common type to coerce to, should error | ||
let err = plan_and_collect(&ctx, "SELECT [1, true, null]") | ||
.await | ||
.unwrap_err(); | ||
assert_eq!(err.to_string(), "Error during planning: Coercion from [Int64, Boolean, Null] to the signature VariadicEqual failed.",); | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_array_scalar_coerce() { | ||
let ctx = SessionContext::new(); | ||
|
||
// The planner should be able to coerce this to all integers | ||
// https://github.com/apache/arrow-datafusion/issues/3170 | ||
let err = plan_and_collect(&ctx, "SELECT [1, 2, '3']") | ||
.await | ||
.unwrap_err(); | ||
assert_eq!(err.to_string(), "Error during planning: Coercion from [Int64, Int64, Utf8] to the signature VariadicEqual failed.",); | ||
} | ||
|
||
#[tokio::test] | ||
async fn query_make_array_scalar() { | ||
let ctx = SessionContext::new(); | ||
|
||
let sql = "SELECT make_array(1, 2, 3);"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the equivalent of |
||
let actual = execute_to_batches(&ctx, sql).await; | ||
let expected = vec![ | ||
"+---------------------------------------+", | ||
"| makearray(Int64(1),Int64(2),Int64(3)) |", | ||
"+---------------------------------------+", | ||
"| [1, 2, 3] |", | ||
"+---------------------------------------+", | ||
]; | ||
assert_batches_eq!(expected, &actual); | ||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,8 @@ use crate::nullif::SUPPORTED_NULLIF_TYPES; | |
use crate::type_coercion::data_types; | ||
use crate::ColumnarValue; | ||
use crate::{ | ||
array_expressions, conditional_expressions, struct_expressions, Accumulator, | ||
BuiltinScalarFunction, Signature, TypeSignature, | ||
conditional_expressions, struct_expressions, Accumulator, BuiltinScalarFunction, | ||
Signature, TypeSignature, | ||
}; | ||
use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit}; | ||
use datafusion_common::{DataFusionError, Result}; | ||
|
@@ -96,7 +96,7 @@ pub fn return_type( | |
// the return type of the built in function. | ||
// Some built-in functions' return type depends on the incoming type. | ||
match fun { | ||
BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList( | ||
BuiltinScalarFunction::MakeArray => Ok(DataType::FixedSizeList( | ||
Box::new(Field::new("item", input_expr_types[0].clone(), true)), | ||
input_expr_types.len() as i32, | ||
)), | ||
|
@@ -267,12 +267,8 @@ pub fn return_type( | |
pub fn signature(fun: &BuiltinScalarFunction) -> Signature { | ||
// note: the physical expression must accept the type returned by this function or the execution panics. | ||
|
||
// for now, the list is small, as we do not have many built-in functions. | ||
match fun { | ||
BuiltinScalarFunction::Array => Signature::variadic( | ||
array_expressions::SUPPORTED_ARRAY_TYPES.to_vec(), | ||
fun.volatility(), | ||
), | ||
BuiltinScalarFunction::MakeArray => Signature::variadic_equal(fun.volatility()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This requires all arguments to |
||
BuiltinScalarFunction::Struct => Signature::variadic( | ||
struct_expressions::SUPPORTED_STRUCT_TYPES.to_vec(), | ||
fun.volatility(), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,10 @@ macro_rules! downcast_vec { | |
}}; | ||
} | ||
|
||
macro_rules! array { | ||
/// Create an array of FixedSizeList from a set of individual Arrays | ||
/// where each element in the output FixedSizeList is the result of | ||
/// concatenating the corresponding values in the input Arrays | ||
macro_rules! make_fixed_size_list { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I renamed this to be more descriptive of what it does (there are too may arrays in DataFusion already ;) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I simply renamed this macro to something that shows more of what it is doing |
||
($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{ | ||
// downcast all arguments to their common format | ||
let args = | ||
|
@@ -59,7 +62,7 @@ macro_rules! array { | |
}}; | ||
} | ||
|
||
fn array_array(args: &[ArrayRef]) -> Result<ArrayRef> { | ||
fn arrays_to_fixed_size_list_array(args: &[ArrayRef]) -> Result<ArrayRef> { | ||
// do not accept 0 arguments. | ||
if args.is_empty() { | ||
return Err(DataFusionError::Internal( | ||
|
@@ -68,19 +71,21 @@ fn array_array(args: &[ArrayRef]) -> Result<ArrayRef> { | |
} | ||
|
||
let res = match args[0].data_type() { | ||
DataType::Utf8 => array!(args, StringArray, StringBuilder), | ||
DataType::LargeUtf8 => array!(args, LargeStringArray, LargeStringBuilder), | ||
DataType::Boolean => array!(args, BooleanArray, BooleanBuilder), | ||
DataType::Float32 => array!(args, Float32Array, Float32Builder), | ||
DataType::Float64 => array!(args, Float64Array, Float64Builder), | ||
DataType::Int8 => array!(args, Int8Array, Int8Builder), | ||
DataType::Int16 => array!(args, Int16Array, Int16Builder), | ||
DataType::Int32 => array!(args, Int32Array, Int32Builder), | ||
DataType::Int64 => array!(args, Int64Array, Int64Builder), | ||
DataType::UInt8 => array!(args, UInt8Array, UInt8Builder), | ||
DataType::UInt16 => array!(args, UInt16Array, UInt16Builder), | ||
DataType::UInt32 => array!(args, UInt32Array, UInt32Builder), | ||
DataType::UInt64 => array!(args, UInt64Array, UInt64Builder), | ||
DataType::Utf8 => make_fixed_size_list!(args, StringArray, StringBuilder), | ||
DataType::LargeUtf8 => { | ||
make_fixed_size_list!(args, LargeStringArray, LargeStringBuilder) | ||
} | ||
DataType::Boolean => make_fixed_size_list!(args, BooleanArray, BooleanBuilder), | ||
DataType::Float32 => make_fixed_size_list!(args, Float32Array, Float32Builder), | ||
DataType::Float64 => make_fixed_size_list!(args, Float64Array, Float64Builder), | ||
DataType::Int8 => make_fixed_size_list!(args, Int8Array, Int8Builder), | ||
DataType::Int16 => make_fixed_size_list!(args, Int16Array, Int16Builder), | ||
DataType::Int32 => make_fixed_size_list!(args, Int32Array, Int32Builder), | ||
DataType::Int64 => make_fixed_size_list!(args, Int64Array, Int64Builder), | ||
DataType::UInt8 => make_fixed_size_list!(args, UInt8Array, UInt8Builder), | ||
DataType::UInt16 => make_fixed_size_list!(args, UInt16Array, UInt16Builder), | ||
DataType::UInt32 => make_fixed_size_list!(args, UInt32Array, UInt32Builder), | ||
DataType::UInt64 => make_fixed_size_list!(args, UInt64Array, UInt64Builder), | ||
data_type => { | ||
return Err(DataFusionError::NotImplemented(format!( | ||
"Array is not implemented for type '{:?}'.", | ||
|
@@ -92,13 +97,15 @@ fn array_array(args: &[ArrayRef]) -> Result<ArrayRef> { | |
} | ||
|
||
/// put values in an array. | ||
pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> { | ||
let arrays: Vec<ArrayRef> = values | ||
.iter() | ||
.map(|x| match x { | ||
ColumnarValue::Array(array) => array.clone(), | ||
ColumnarValue::Scalar(scalar) => scalar.to_array().clone(), | ||
}) | ||
.collect(); | ||
Ok(ColumnarValue::Array(array_array(arrays.as_slice())?)) | ||
Ok(ColumnarValue::Array(arrays_to_fixed_size_list_array( | ||
arrays.as_slice(), | ||
)?)) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The changes in this file illustrate the change in behavior that this PR makes