|
15 | 15 | // specific language governing permissions and limitations |
16 | 16 | // under the License. |
17 | 17 |
|
18 | | -use crate::error::{DataFusionError, Result}; |
| 18 | +use crate::error::{DataFusionError, Result, _plan_err}; |
19 | 19 | use arrow::{ |
20 | 20 | array::{new_null_array, Array, ArrayRef, StructArray}, |
21 | 21 | compute::cast, |
22 | | - datatypes::{DataType::Struct, Field}, |
| 22 | + datatypes::{DataType::Struct, Field, FieldRef}, |
23 | 23 | }; |
24 | 24 | use std::sync::Arc; |
25 | 25 |
|
@@ -121,6 +121,81 @@ pub fn cast_column(source_col: &ArrayRef, target_field: &Field) -> Result<ArrayR |
121 | 121 | } |
122 | 122 | } |
123 | 123 |
|
| 124 | +/// Validates compatibility between source and target struct fields for casting operations. |
| 125 | +/// |
| 126 | +/// This function implements comprehensive struct compatibility checking by examining: |
| 127 | +/// - Field name matching between source and target structs |
| 128 | +/// - Type castability for each matching field (including recursive struct validation) |
| 129 | +/// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls) |
| 130 | +/// - Proper handling of extra fields (source fields not in target are allowed - ignored) |
| 131 | +/// |
| 132 | +/// # Compatibility Rules |
| 133 | +/// - **Field Matching**: Fields are matched by name (case-sensitive) |
| 134 | +/// - **Missing Target Fields**: Allowed - will be filled with null values during casting |
| 135 | +/// - **Extra Source Fields**: Allowed - will be ignored during casting |
| 136 | +/// - **Type Compatibility**: Each matching field must be castable using Arrow's type system |
| 137 | +/// - **Nested Structs**: Recursively validates nested struct compatibility |
| 138 | +/// |
| 139 | +/// # Arguments |
| 140 | +/// * `source_fields` - Fields from the source struct type |
| 141 | +/// * `target_fields` - Fields from the target struct type |
| 142 | +/// |
| 143 | +/// # Returns |
| 144 | +/// * `Ok(true)` if the structs are compatible for casting |
| 145 | +/// * `Err(DataFusionError)` with detailed error message if incompatible |
| 146 | +/// |
| 147 | +/// # Examples |
| 148 | +/// ```ignore |
| 149 | +/// // Compatible: source has extra field, target has missing field |
| 150 | +/// // Source: {a: i32, b: string, c: f64} |
| 151 | +/// // Target: {a: i64, d: bool} |
| 152 | +/// // Result: Ok(true) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls |
| 153 | +/// |
| 154 | +/// // Incompatible: matching field has incompatible types |
| 155 | +/// // Source: {a: string} |
| 156 | +/// // Target: {a: binary} |
| 157 | +/// // Result: Err(...) - string cannot cast to binary |
| 158 | +/// ``` |
| 159 | +pub fn validate_struct_compatibility( |
| 160 | + source_fields: &[FieldRef], |
| 161 | + target_fields: &[FieldRef], |
| 162 | +) -> Result<bool> { |
| 163 | + // Check compatibility for each target field |
| 164 | + for target_field in target_fields { |
| 165 | + // Look for matching field in source by name |
| 166 | + if let Some(source_field) = source_fields |
| 167 | + .iter() |
| 168 | + .find(|f| f.name() == target_field.name()) |
| 169 | + { |
| 170 | + // Check if the matching field types are compatible |
| 171 | + match (source_field.data_type(), target_field.data_type()) { |
| 172 | + // Recursively validate nested structs |
| 173 | + (Struct(source_nested), Struct(target_nested)) => { |
| 174 | + validate_struct_compatibility(source_nested, target_nested)?; |
| 175 | + } |
| 176 | + // For non-struct types, use the existing castability check |
| 177 | + _ => { |
| 178 | + if !arrow::compute::can_cast_types( |
| 179 | + source_field.data_type(), |
| 180 | + target_field.data_type(), |
| 181 | + ) { |
| 182 | + return _plan_err!( |
| 183 | + "Cannot cast struct field '{}' from type {:?} to type {:?}", |
| 184 | + target_field.name(), |
| 185 | + source_field.data_type(), |
| 186 | + target_field.data_type() |
| 187 | + ); |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + // Missing fields in source are OK - they'll be filled with nulls |
| 193 | + } |
| 194 | + |
| 195 | + // Extra fields in source are OK - they'll be ignored |
| 196 | + Ok(true) |
| 197 | +} |
| 198 | + |
124 | 199 | #[cfg(test)] |
125 | 200 | mod tests { |
126 | 201 | use super::*; |
@@ -202,4 +277,53 @@ mod tests { |
202 | 277 | assert!(error_msg.contains("to struct type")); |
203 | 278 | assert!(error_msg.contains("Source must be a struct")); |
204 | 279 | } |
| 280 | + |
| 281 | + #[test] |
| 282 | + fn test_validate_struct_compatibility_incompatible_types() { |
| 283 | + // Source struct: {field1: Binary, field2: String} |
| 284 | + let source_fields = vec![ |
| 285 | + Arc::new(Field::new("field1", DataType::Binary, true)), |
| 286 | + Arc::new(Field::new("field2", DataType::Utf8, true)), |
| 287 | + ]; |
| 288 | + |
| 289 | + // Target struct: {field1: Int32} |
| 290 | + let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))]; |
| 291 | + |
| 292 | + let result = validate_struct_compatibility(&source_fields, &target_fields); |
| 293 | + assert!(result.is_err()); |
| 294 | + let error_msg = result.unwrap_err().to_string(); |
| 295 | + assert!(error_msg.contains("Cannot cast struct field 'field1'")); |
| 296 | + assert!(error_msg.contains("Binary")); |
| 297 | + assert!(error_msg.contains("Int32")); |
| 298 | + } |
| 299 | + |
| 300 | + #[test] |
| 301 | + fn test_validate_struct_compatibility_compatible_types() { |
| 302 | + // Source struct: {field1: Int32, field2: String} |
| 303 | + let source_fields = vec![ |
| 304 | + Arc::new(Field::new("field1", DataType::Int32, true)), |
| 305 | + Arc::new(Field::new("field2", DataType::Utf8, true)), |
| 306 | + ]; |
| 307 | + |
| 308 | + // Target struct: {field1: Int64} (Int32 can cast to Int64) |
| 309 | + let target_fields = vec![Arc::new(Field::new("field1", DataType::Int64, true))]; |
| 310 | + |
| 311 | + let result = validate_struct_compatibility(&source_fields, &target_fields); |
| 312 | + assert!(result.is_ok()); |
| 313 | + assert!(result.unwrap()); |
| 314 | + } |
| 315 | + |
| 316 | + #[test] |
| 317 | + fn test_validate_struct_compatibility_missing_field_in_source() { |
| 318 | + // Source struct: {field2: String} (missing field1) |
| 319 | + let source_fields = vec![Arc::new(Field::new("field2", DataType::Utf8, true))]; |
| 320 | + |
| 321 | + // Target struct: {field1: Int32} |
| 322 | + let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))]; |
| 323 | + |
| 324 | + // Should be OK - missing fields will be filled with nulls |
| 325 | + let result = validate_struct_compatibility(&source_fields, &target_fields); |
| 326 | + assert!(result.is_ok()); |
| 327 | + assert!(result.unwrap()); |
| 328 | + } |
205 | 329 | } |
0 commit comments