diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 8fd464fbd6aac..95e60769c8507 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -47,6 +47,7 @@ use arrow::{ }; use arrow_array::types::ArrowTimestampType; use arrow_array::{ArrowNativeTypeOp, Scalar}; +use arrow_buffer::Buffer; /// A dynamically typed, nullable single value, (the single-valued counter-part /// to arrow's [`Array`]) @@ -1389,25 +1390,32 @@ impl ScalarValue { if first_struct.is_none() { return _internal_err!( "Inconsistent types in ScalarValue::iter_to_array. \ - Expected ScalarValue::Struct, got {:?}", arrays[0].clone() + Expected ScalarValue::Struct, got {:?}", + arrays[0].clone() ); } + + let mut valid = BooleanBufferBuilder::new(arrays.len()); + let first_struct = first_struct.unwrap(); - let column_fields = first_struct.fields().to_vec(); - let mut column_values: Vec> = vec![Vec::with_capacity(arrays.len()); first_struct.num_columns()]; - for i in 0..first_struct.num_columns() { - let arr = first_struct.column(i).to_owned(); - let sv = ScalarValue::try_from_array(&arr, 0)?; + valid.append(first_struct.is_valid(0)); + + let mut column_values: Vec> = + vec![Vec::with_capacity(arrays.len()); first_struct.num_columns()]; + + for (i, v) in first_struct.columns().iter().enumerate() { + // ScalarValue::Struct contains a single element in each column. + let sv = ScalarValue::try_from_array(v, 0)?; column_values[i].push(sv); } for arr in arrays.iter().skip(1) { if let Some(struct_array) = arr.as_struct_opt() { - // Since it is not possible to have `Null` in ScalarValue::Struct, - // And, ScalarValue::Struct contains a single element StructArray. - for i in 0..struct_array.num_columns() { - let arr = struct_array.column(i).to_owned(); - let sv = ScalarValue::try_from_array(&arr, 0)?; + valid.append(struct_array.is_valid(0)); + + for (i, v) in struct_array.columns().iter().enumerate() { + // ScalarValue::Struct contains a single element in each column. + let sv = ScalarValue::try_from_array(v, 0)?; column_values[i].push(sv); } } else { @@ -1417,19 +1425,26 @@ impl ScalarValue { ); } } - println!("column_arrays: {:?}", column_values); - println!("column_fields: {:?}", column_fields); + + let column_fields = first_struct.fields().to_vec(); let mut data = vec![]; - for (field, values) in column_fields.into_iter().zip(column_values.into_iter()) { + for (field, values) in + column_fields.into_iter().zip(column_values.into_iter()) + { let field = field.to_owned(); let array = ScalarValue::iter_to_array(values.into_iter())?; data.push((field, array)); } - println!("data: {:?}", data); + let bool_buffer = valid.finish(); + let buffer: Buffer = bool_buffer.values().into(); + println!("bool_buffer: {:?}", bool_buffer); + // println!("bool_buffer v: {:?}", bool_buffer.values()); + + // println!("data: {:?}", data); - Ok(Arc::new(StructArray::from(data))) + Ok(Arc::new(StructArray::from((data, buffer)))) } fn build_list_array( @@ -1539,9 +1554,7 @@ impl ScalarValue { build_array_primitive!(IntervalMonthDayNanoArray, IntervalMonthDayNano) } DataType::Struct(_) => build_struct_array(scalars)?, - DataType::List(_) | DataType::LargeList(_) => { - build_list_array(scalars)? - } + DataType::List(_) | DataType::LargeList(_) => build_list_array(scalars)?, DataType::Dictionary(key_type, value_type) => { // create the values array let value_scalars = scalars @@ -3139,6 +3152,7 @@ mod tests { use std::cmp::Ordering; use std::sync::Arc; + use arrow_buffer::Buffer; use chrono::NaiveDate; use rand::Rng; @@ -3325,23 +3339,80 @@ mod tests { #[test] fn test_iter_to_array_struct() { - let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true])); - let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); + let s1 = StructArray::from(vec![ + ( + Arc::new(Field::new("A", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![false])) as ArrayRef, + ), + ( + Arc::new(Field::new("B", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![42])) as ArrayRef, + ), + ]); - let s1 = ( - Arc::new(Field::new("b", DataType::Boolean, false)), - boolean.clone() as ArrayRef - ); + let s2 = StructArray::from(vec![ + ( + Arc::new(Field::new("A", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![false])) as ArrayRef, + ), + ( + Arc::new(Field::new("B", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![42])) as ArrayRef, + ), + ]); - let s2 = ( - Arc::new(Field::new("c", DataType::Int32, false)), - int.clone() as ArrayRef - ); + let scalars = vec![ + ScalarValue::Struct(Arc::new(s1)), + ScalarValue::Struct(Arc::new(s2)), + ]; + + let array = ScalarValue::iter_to_array(scalars).unwrap(); + + let expected = StructArray::from(vec![ + ( + Arc::new(Field::new("A", DataType::Boolean, false)), + Arc::new(BooleanArray::from(vec![false, false])) as ArrayRef, + ), + ( + Arc::new(Field::new("B", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![42, 42])) as ArrayRef, + ), + ]); + assert_eq!(array.as_ref(), &expected); + } - let expected = StructArray::from(vec![s1.clone(), s2.clone()]); + #[test] + fn test_iter_to_array_struct_with_nulls() { + // non-null + let s1 = StructArray::from(( + vec![ + ( + Arc::new(Field::new("A", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![1])) as ArrayRef, + ), + ( + Arc::new(Field::new("B", DataType::Int64, false)), + Arc::new(Int64Array::from(vec![2])) as ArrayRef, + ), + ], + // Present the null mask, 1 is non-null, 0 is null + Buffer::from(&[1]), + )); - let s1 = StructArray::from(vec![s1]); - let s2 = StructArray::from(vec![s2]); + // null + let s2 = StructArray::from(( + vec![ + ( + Arc::new(Field::new("A", DataType::Int32, false)), + Arc::new(Int32Array::from(vec![3])) as ArrayRef, + ), + ( + Arc::new(Field::new("B", DataType::Int64, false)), + Arc::new(Int64Array::from(vec![4])) as ArrayRef, + ), + ], + Buffer::from(&[0]), + )); let scalars = vec![ ScalarValue::Struct(Arc::new(s1)), @@ -3349,7 +3420,9 @@ mod tests { ]; let array = ScalarValue::iter_to_array(scalars).unwrap(); - assert_eq!(array.as_ref(), &expected); + let struct_array = array.as_struct(); + assert!(struct_array.is_valid(0)); + assert!(struct_array.is_null(1)); } #[test] @@ -4677,7 +4750,7 @@ mod tests { // iter_to_array for struct scalars let array = ScalarValue::iter_to_array(vec![s0.clone(), s1.clone(), s2.clone()]).unwrap(); - + println!("array: {array:?}"); let array = as_struct_array(&array).unwrap();