-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Labels
Description
Describe the bug
Casting struct arrays requires fields to be in the same order in the source as the cast requests. Struct field order is intended to be irrelevant though.
To Reproduce
#[test]
fn test_cast_struct_with_different_field_order() {
// Test slow path: fields are in different order
let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
let string = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "qux"]));
let struct_array = StructArray::from(vec![
(
Arc::new(Field::new("a", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Arc::new(Field::new("b", DataType::Int32, false)),
int.clone() as ArrayRef,
),
(
Arc::new(Field::new("c", DataType::Utf8, false)),
string.clone() as ArrayRef,
),
]);
// Target has fields in different order: c, a, b instead of a, b, c
let to_type = DataType::Struct(
vec![
Field::new("c", DataType::Utf8, false),
Field::new("a", DataType::Utf8, false), // Boolean to Utf8
Field::new("b", DataType::Utf8, false), // Int32 to Utf8
]
.into(),
);
let result = cast(&struct_array, &to_type).unwrap();
let result_struct = result.as_struct();
assert_eq!(result_struct.data_type(), &to_type);
assert_eq!(result_struct.num_columns(), 3);
// Verify field "c" (originally position 2, now position 0) remains Utf8
let c_column = result_struct.column(0).as_string::<i32>();
assert_eq!(
c_column.into_iter().flatten().collect::<Vec<_>>(),
vec!["foo", "bar", "baz", "qux"]
);
// Verify field "a" (originally position 0, now position 1) was cast from Boolean to Utf8
let a_column = result_struct.column(1).as_string::<i32>();
assert_eq!(
a_column.into_iter().flatten().collect::<Vec<_>>(),
vec!["false", "false", "true", "true"]
);
// Verify field "b" (originally position 1, now position 2) was cast from Int32 to Utf8
let b_column = result_struct.column(2).as_string::<i32>();
assert_eq!(
b_column.into_iter().flatten().collect::<Vec<_>>(),
vec!["42", "28", "19", "31"]
);
}
Expected behavior
The above works, and casting should happen field-wise.
Additional context
I already have a patch, but opening this for tracking purposes.
alambvegarsti