@@ -293,6 +293,155 @@ impl ScalarValue {
293293 self . to_array_of_size ( 1 )
294294 }
295295
296+ /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`]
297+ /// corresponding to those values. For example,
298+ ///
299+ /// Returns an error if the iterator is empty or if the
300+ /// [`ScalarValue`]s are not all the same type
301+ ///
302+ /// Example
303+ /// ```
304+ /// use datafusion::scalar::ScalarValue;
305+ /// use arrow::array::{ArrayRef, BooleanArray};
306+ ///
307+ /// let scalars = vec![
308+ /// ScalarValue::Boolean(Some(true)),
309+ /// ScalarValue::Boolean(None),
310+ /// ScalarValue::Boolean(Some(false)),
311+ /// ];
312+ ///
313+ /// // Build an Array from the list of ScalarValues
314+ /// let array = ScalarValue::iter_to_array(scalars.iter())
315+ /// .unwrap();
316+ ///
317+ /// let expected: ArrayRef = std::sync::Arc::new(
318+ /// BooleanArray::from(vec![
319+ /// Some(true),
320+ /// None,
321+ /// Some(false)
322+ /// ]
323+ /// ));
324+ ///
325+ /// assert_eq!(&array, &expected);
326+ /// ```
327+ pub fn iter_to_array < ' a > (
328+ scalars : impl IntoIterator < Item = & ' a ScalarValue > ,
329+ ) -> Result < ArrayRef > {
330+ let mut scalars = scalars. into_iter ( ) . peekable ( ) ;
331+
332+ // figure out the type based on the first element
333+ let data_type = match scalars. peek ( ) {
334+ None => {
335+ return Err ( DataFusionError :: Internal (
336+ "Empty iterator passed to ScalarValue::iter_to_array" . to_string ( ) ,
337+ ) )
338+ }
339+ Some ( sv) => sv. get_datatype ( ) ,
340+ } ;
341+
342+ /// Creates an array of $ARRAY_TY by unpacking values of
343+ /// SCALAR_TY for primitive types
344+ macro_rules! build_array_primitive {
345+ ( $ARRAY_TY: ident, $SCALAR_TY: ident) => { {
346+ {
347+ let values = scalars
348+ . map( |sv| {
349+ if let ScalarValue :: $SCALAR_TY( v) = sv {
350+ Ok ( * v)
351+ } else {
352+ Err ( DataFusionError :: Internal ( format!(
353+ "Inconsistent types in ScalarValue::iter_to_array. \
354+ Expected {:?}, got {:?}",
355+ data_type, sv
356+ ) ) )
357+ }
358+ } )
359+ . collect:: <Result <Vec <_>>>( ) ?;
360+
361+ let array: $ARRAY_TY = values. iter( ) . collect( ) ;
362+ Arc :: new( array)
363+ }
364+ } } ;
365+ }
366+
367+ /// Creates an array of $ARRAY_TY by unpacking values of
368+ /// SCALAR_TY for "string-like" types.
369+ macro_rules! build_array_string {
370+ ( $ARRAY_TY: ident, $SCALAR_TY: ident) => { {
371+ {
372+ let values = scalars
373+ . map( |sv| {
374+ if let ScalarValue :: $SCALAR_TY( v) = sv {
375+ Ok ( v)
376+ } else {
377+ Err ( DataFusionError :: Internal ( format!(
378+ "Inconsistent types in ScalarValue::iter_to_array. \
379+ Expected {:?}, got {:?}",
380+ data_type, sv
381+ ) ) )
382+ }
383+ } )
384+ . collect:: <Result <Vec <_>>>( ) ?;
385+
386+ // it is annoying that one can not create
387+ // StringArray et al directly from iter of &String,
388+ // requiring this map to &str
389+ let values = values. iter( ) . map( |s| s. as_ref( ) ) ;
390+
391+ let array: $ARRAY_TY = values. collect( ) ;
392+ Arc :: new( array)
393+ }
394+ } } ;
395+ }
396+
397+ let array: ArrayRef = match & data_type {
398+ DataType :: Boolean => build_array_primitive ! ( BooleanArray , Boolean ) ,
399+ DataType :: Float32 => build_array_primitive ! ( Float32Array , Float32 ) ,
400+ DataType :: Float64 => build_array_primitive ! ( Float64Array , Float64 ) ,
401+ DataType :: Int8 => build_array_primitive ! ( Int8Array , Int8 ) ,
402+ DataType :: Int16 => build_array_primitive ! ( Int16Array , Int16 ) ,
403+ DataType :: Int32 => build_array_primitive ! ( Int32Array , Int32 ) ,
404+ DataType :: Int64 => build_array_primitive ! ( Int64Array , Int64 ) ,
405+ DataType :: UInt8 => build_array_primitive ! ( UInt8Array , UInt8 ) ,
406+ DataType :: UInt16 => build_array_primitive ! ( UInt16Array , UInt16 ) ,
407+ DataType :: UInt32 => build_array_primitive ! ( UInt32Array , UInt32 ) ,
408+ DataType :: UInt64 => build_array_primitive ! ( UInt64Array , UInt64 ) ,
409+ DataType :: Utf8 => build_array_string ! ( StringArray , Utf8 ) ,
410+ DataType :: LargeUtf8 => build_array_string ! ( LargeStringArray , LargeUtf8 ) ,
411+ DataType :: Binary => build_array_string ! ( BinaryArray , Binary ) ,
412+ DataType :: LargeBinary => build_array_string ! ( LargeBinaryArray , LargeBinary ) ,
413+ DataType :: Date32 => build_array_primitive ! ( Date32Array , Date32 ) ,
414+ DataType :: Date64 => build_array_primitive ! ( Date64Array , Date64 ) ,
415+ DataType :: Timestamp ( TimeUnit :: Second , None ) => {
416+ build_array_primitive ! ( TimestampSecondArray , TimestampSecond )
417+ }
418+ DataType :: Timestamp ( TimeUnit :: Millisecond , None ) => {
419+ build_array_primitive ! ( TimestampMillisecondArray , TimestampMillisecond )
420+ }
421+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
422+ build_array_primitive ! ( TimestampMicrosecondArray , TimestampMicrosecond )
423+ }
424+ DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) => {
425+ build_array_primitive ! ( TimestampNanosecondArray , TimestampNanosecond )
426+ }
427+ DataType :: Interval ( IntervalUnit :: DayTime ) => {
428+ build_array_primitive ! ( IntervalDayTimeArray , IntervalDayTime )
429+ }
430+ DataType :: Interval ( IntervalUnit :: YearMonth ) => {
431+ build_array_primitive ! ( IntervalYearMonthArray , IntervalYearMonth )
432+ }
433+ _ => {
434+ return Err ( DataFusionError :: Internal ( format ! (
435+ "Unsupported creation of {:?} array from ScalarValue {:?}" ,
436+ data_type,
437+ scalars. peek( )
438+ ) ) )
439+ }
440+ } ;
441+
442+ Ok ( array)
443+ }
444+
296445 /// Converts a scalar value into an array of `size` rows.
297446 pub fn to_array_of_size ( & self , size : usize ) -> ArrayRef {
298447 match self {
@@ -609,6 +758,12 @@ impl From<u64> for ScalarValue {
609758 }
610759}
611760
761+ impl From < & str > for ScalarValue {
762+ fn from ( value : & str ) -> Self {
763+ ScalarValue :: Utf8 ( Some ( value. to_string ( ) ) )
764+ }
765+ }
766+
612767macro_rules! impl_try_from {
613768 ( $SCALAR: ident, $NATIVE: ident) => {
614769 impl TryFrom <ScalarValue > for $NATIVE {
@@ -940,4 +1095,139 @@ mod tests {
9401095 assert ! ( prim_array. is_null( 1 ) ) ;
9411096 assert_eq ! ( prim_array. value( 2 ) , 101 ) ;
9421097 }
1098+
1099+ /// Creates array directly and via ScalarValue and ensures they are the same
1100+ macro_rules! check_scalar_iter {
1101+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1102+ let scalars: Vec <_> =
1103+ $INPUT. iter( ) . map( |v| ScalarValue :: $SCALAR_T( * v) ) . collect( ) ;
1104+
1105+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1106+
1107+ let expected: ArrayRef = Arc :: new( $ARRAYTYPE:: from( $INPUT) ) ;
1108+
1109+ assert_eq!( & array, & expected) ;
1110+ } } ;
1111+ }
1112+
1113+ /// Creates array directly and via ScalarValue and ensures they
1114+ /// are the same, for string arrays
1115+ macro_rules! check_scalar_iter_string {
1116+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1117+ let scalars: Vec <_> = $INPUT
1118+ . iter( )
1119+ . map( |v| ScalarValue :: $SCALAR_T( v. map( |v| v. to_string( ) ) ) )
1120+ . collect( ) ;
1121+
1122+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1123+
1124+ let expected: ArrayRef = Arc :: new( $ARRAYTYPE:: from( $INPUT) ) ;
1125+
1126+ assert_eq!( & array, & expected) ;
1127+ } } ;
1128+ }
1129+
1130+ /// Creates array directly and via ScalarValue and ensures they
1131+ /// are the same, for binary arrays
1132+ macro_rules! check_scalar_iter_binary {
1133+ ( $SCALAR_T: ident, $ARRAYTYPE: ident, $INPUT: expr) => { {
1134+ let scalars: Vec <_> = $INPUT
1135+ . iter( )
1136+ . map( |v| ScalarValue :: $SCALAR_T( v. map( |v| v. to_vec( ) ) ) )
1137+ . collect( ) ;
1138+
1139+ let array = ScalarValue :: iter_to_array( scalars. iter( ) ) . unwrap( ) ;
1140+
1141+ let expected: $ARRAYTYPE =
1142+ $INPUT. iter( ) . map( |v| v. map( |v| v. to_vec( ) ) ) . collect( ) ;
1143+
1144+ let expected: ArrayRef = Arc :: new( expected) ;
1145+
1146+ assert_eq!( & array, & expected) ;
1147+ } } ;
1148+ }
1149+
1150+ #[ test]
1151+ fn scalar_iter_to_array_boolean ( ) {
1152+ check_scalar_iter ! ( Boolean , BooleanArray , vec![ Some ( true ) , None , Some ( false ) ] ) ;
1153+ check_scalar_iter ! ( Float32 , Float32Array , vec![ Some ( 1.9 ) , None , Some ( -2.1 ) ] ) ;
1154+ check_scalar_iter ! ( Float64 , Float64Array , vec![ Some ( 1.9 ) , None , Some ( -2.1 ) ] ) ;
1155+
1156+ check_scalar_iter ! ( Int8 , Int8Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1157+ check_scalar_iter ! ( Int16 , Int16Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1158+ check_scalar_iter ! ( Int32 , Int32Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1159+ check_scalar_iter ! ( Int64 , Int64Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1160+
1161+ check_scalar_iter ! ( UInt8 , UInt8Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1162+ check_scalar_iter ! ( UInt16 , UInt16Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1163+ check_scalar_iter ! ( UInt32 , UInt32Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1164+ check_scalar_iter ! ( UInt64 , UInt64Array , vec![ Some ( 1 ) , None , Some ( 3 ) ] ) ;
1165+
1166+ check_scalar_iter ! (
1167+ TimestampSecond ,
1168+ TimestampSecondArray ,
1169+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1170+ ) ;
1171+ check_scalar_iter ! (
1172+ TimestampMillisecond ,
1173+ TimestampMillisecondArray ,
1174+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1175+ ) ;
1176+ check_scalar_iter ! (
1177+ TimestampMicrosecond ,
1178+ TimestampMicrosecondArray ,
1179+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1180+ ) ;
1181+ check_scalar_iter ! (
1182+ TimestampNanosecond ,
1183+ TimestampNanosecondArray ,
1184+ vec![ Some ( 1 ) , None , Some ( 3 ) ]
1185+ ) ;
1186+
1187+ check_scalar_iter_string ! (
1188+ Utf8 ,
1189+ StringArray ,
1190+ vec![ Some ( "foo" ) , None , Some ( "bar" ) ]
1191+ ) ;
1192+ check_scalar_iter_string ! (
1193+ LargeUtf8 ,
1194+ LargeStringArray ,
1195+ vec![ Some ( "foo" ) , None , Some ( "bar" ) ]
1196+ ) ;
1197+ check_scalar_iter_binary ! (
1198+ Binary ,
1199+ BinaryArray ,
1200+ vec![ Some ( b"foo" ) , None , Some ( b"bar" ) ]
1201+ ) ;
1202+ check_scalar_iter_binary ! (
1203+ LargeBinary ,
1204+ LargeBinaryArray ,
1205+ vec![ Some ( b"foo" ) , None , Some ( b"bar" ) ]
1206+ ) ;
1207+ }
1208+
1209+ #[ test]
1210+ fn scalar_iter_to_array_empty ( ) {
1211+ let scalars = vec ! [ ] as Vec < ScalarValue > ;
1212+
1213+ let result = ScalarValue :: iter_to_array ( scalars. iter ( ) ) . unwrap_err ( ) ;
1214+ assert ! (
1215+ result
1216+ . to_string( )
1217+ . contains( "Empty iterator passed to ScalarValue::iter_to_array" ) ,
1218+ "{}" ,
1219+ result
1220+ ) ;
1221+ }
1222+
1223+ #[ test]
1224+ fn scalar_iter_to_array_mismatched_types ( ) {
1225+ use ScalarValue :: * ;
1226+ // If the scalar values are not all the correct type, error here
1227+ let scalars: Vec < ScalarValue > = vec ! [ Boolean ( Some ( true ) ) , Int32 ( Some ( 5 ) ) ] ;
1228+
1229+ let result = ScalarValue :: iter_to_array ( scalars. iter ( ) ) . unwrap_err ( ) ;
1230+ assert ! ( result. to_string( ) . contains( "Inconsistent types in ScalarValue::iter_to_array. Expected Boolean, got Int32(5)" ) ,
1231+ "{}" , result) ;
1232+ }
9431233}
0 commit comments