Skip to content

Commit 87e69b9

Browse files
EduardoVegaWeijun-Halamb
authored andcommitted
Support dictionary data type in array_to_string (apache#10908)
* Support dictionary data type in array_to_string * Fix import * Some tests * Update datafusion/functions-array/src/string.rs Co-authored-by: Alex Huang <huangweijun1001@gmail.com> * Add some tests showing incorrect results * Get logical array * apply rust fmt * Simplify implementation, avoid panics --------- Co-authored-by: Alex Huang <huangweijun1001@gmail.com> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent da2d371 commit 87e69b9

File tree

2 files changed

+73
-4
lines changed

2 files changed

+73
-4
lines changed

datafusion/functions-array/src/string.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@ use arrow::array::{
2626
use arrow::datatypes::{DataType, Field};
2727
use datafusion_expr::TypeSignature;
2828

29-
use datafusion_common::{plan_err, DataFusionError, Result};
29+
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
3030

3131
use std::any::{type_name, Any};
3232

3333
use crate::utils::{downcast_arg, make_scalar_function};
34-
use arrow_schema::DataType::{FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8};
34+
use arrow::compute::cast;
35+
use arrow_schema::DataType::{
36+
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8,
37+
};
3538
use datafusion_common::cast::{
3639
as_generic_string_array, as_large_list_array, as_list_array, as_string_array,
3740
};
@@ -76,7 +79,7 @@ macro_rules! call_array_function {
7679
DataType::UInt16 => array_function!(UInt16Array),
7780
DataType::UInt32 => array_function!(UInt32Array),
7881
DataType::UInt64 => array_function!(UInt64Array),
79-
_ => unreachable!(),
82+
dt => not_impl_err!("Unsupported data type in array_to_string: {dt}"),
8083
}
8184
};
8285
($DATATYPE:expr, $INCLUDE_LIST:expr) => {{
@@ -95,7 +98,7 @@ macro_rules! call_array_function {
9598
DataType::UInt16 => array_function!(UInt16Array),
9699
DataType::UInt32 => array_function!(UInt32Array),
97100
DataType::UInt64 => array_function!(UInt64Array),
98-
_ => unreachable!(),
101+
dt => not_impl_err!("Unsupported data type in array_to_string: {dt}"),
99102
}
100103
}};
101104
}
@@ -245,6 +248,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
245248
with_null_string = true;
246249
}
247250

251+
/// Creates a single string from single element of a ListArray (which is
252+
/// itself another Array)
248253
fn compute_array_to_string(
249254
arg: &mut String,
250255
arr: ArrayRef,
@@ -281,6 +286,22 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
281286

282287
Ok(arg)
283288
}
289+
Dictionary(_key_type, value_type) => {
290+
// Call cast to unwrap the dictionary. This could be optimized if we wanted
291+
// to accept the overhead of extra code
292+
let values = cast(&arr, value_type.as_ref()).map_err(|e| {
293+
DataFusionError::from(e).context(
294+
"Casting dictionary to values in compute_array_to_string",
295+
)
296+
})?;
297+
compute_array_to_string(
298+
arg,
299+
values,
300+
delimiter,
301+
null_string,
302+
with_null_string,
303+
)
304+
}
284305
Null => Ok(arg),
285306
data_type => {
286307
macro_rules! array_function {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3769,6 +3769,54 @@ select array_to_string(make_array(), ',')
37693769
----
37703770
(empty)
37713771

3772+
# array to string dictionary
3773+
statement ok
3774+
CREATE TABLE table1 AS VALUES
3775+
(1, 'foo'),
3776+
(3, 'bar'),
3777+
(1, 'foo'),
3778+
(2, NULL),
3779+
(NULL, 'baz')
3780+
;
3781+
3782+
# expect 1-3-1-2 (dictionary values should be repeated)
3783+
query T
3784+
SELECT array_to_string(array_agg(column1),'-')
3785+
FROM (
3786+
SELECT arrow_cast(column1, 'Dictionary(Int32, Int32)') as column1
3787+
FROM table1
3788+
);
3789+
----
3790+
1-3-1-2
3791+
3792+
# expect foo,bar,foo,baz (dictionary values should be repeated)
3793+
query T
3794+
SELECT array_to_string(array_agg(column2),',')
3795+
FROM (
3796+
SELECT arrow_cast(column2, 'Dictionary(Int64, Utf8)') as column2
3797+
FROM table1
3798+
);
3799+
----
3800+
foo,bar,foo,baz
3801+
3802+
# Expect only values that are in the group
3803+
query I?T
3804+
SELECT column1, array_agg(column2), array_to_string(array_agg(column2),',')
3805+
FROM (
3806+
SELECT column1, arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2
3807+
FROM table1
3808+
)
3809+
GROUP BY column1
3810+
ORDER BY column1;
3811+
----
3812+
1 [foo, foo] foo,foo
3813+
2 [] (empty)
3814+
3 [bar] bar
3815+
NULL [baz] baz
3816+
3817+
statement ok
3818+
drop table table1;
3819+
37723820

37733821
## array_union (aliases: `list_union`)
37743822

0 commit comments

Comments
 (0)