Skip to content

Commit 2bc8256

Browse files
committed
feat: move and make validate_struct_compatibility public for nested struct casting
1 parent 96f97b7 commit 2bc8256

File tree

2 files changed

+129
-135
lines changed

2 files changed

+129
-135
lines changed

datafusion/common/src/nested_struct.rs

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::error::{DataFusionError, Result};
18+
use crate::error::{DataFusionError, Result, _plan_err};
1919
use arrow::{
2020
array::{new_null_array, Array, ArrayRef, StructArray},
2121
compute::cast,
22-
datatypes::{DataType::Struct, Field},
22+
datatypes::{DataType::Struct, Field, FieldRef},
2323
};
2424
use std::sync::Arc;
2525

@@ -121,6 +121,81 @@ pub fn cast_column(source_col: &ArrayRef, target_field: &Field) -> Result<ArrayR
121121
}
122122
}
123123

124+
/// Validates compatibility between source and target struct fields for casting operations.
125+
///
126+
/// This function implements comprehensive struct compatibility checking by examining:
127+
/// - Field name matching between source and target structs
128+
/// - Type castability for each matching field (including recursive struct validation)
129+
/// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls)
130+
/// - Proper handling of extra fields (source fields not in target are allowed - ignored)
131+
///
132+
/// # Compatibility Rules
133+
/// - **Field Matching**: Fields are matched by name (case-sensitive)
134+
/// - **Missing Target Fields**: Allowed - will be filled with null values during casting
135+
/// - **Extra Source Fields**: Allowed - will be ignored during casting
136+
/// - **Type Compatibility**: Each matching field must be castable using Arrow's type system
137+
/// - **Nested Structs**: Recursively validates nested struct compatibility
138+
///
139+
/// # Arguments
140+
/// * `source_fields` - Fields from the source struct type
141+
/// * `target_fields` - Fields from the target struct type
142+
///
143+
/// # Returns
144+
/// * `Ok(true)` if the structs are compatible for casting
145+
/// * `Err(DataFusionError)` with detailed error message if incompatible
146+
///
147+
/// # Examples
148+
/// ```ignore
149+
/// // Compatible: source has extra field, target has missing field
150+
/// // Source: {a: i32, b: string, c: f64}
151+
/// // Target: {a: i64, d: bool}
152+
/// // Result: Ok(true) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls
153+
///
154+
/// // Incompatible: matching field has incompatible types
155+
/// // Source: {a: string}
156+
/// // Target: {a: binary}
157+
/// // Result: Err(...) - string cannot cast to binary
158+
/// ```
159+
pub fn validate_struct_compatibility(
160+
source_fields: &[FieldRef],
161+
target_fields: &[FieldRef],
162+
) -> Result<bool> {
163+
// Check compatibility for each target field
164+
for target_field in target_fields {
165+
// Look for matching field in source by name
166+
if let Some(source_field) = source_fields
167+
.iter()
168+
.find(|f| f.name() == target_field.name())
169+
{
170+
// Check if the matching field types are compatible
171+
match (source_field.data_type(), target_field.data_type()) {
172+
// Recursively validate nested structs
173+
(Struct(source_nested), Struct(target_nested)) => {
174+
validate_struct_compatibility(source_nested, target_nested)?;
175+
}
176+
// For non-struct types, use the existing castability check
177+
_ => {
178+
if !arrow::compute::can_cast_types(
179+
source_field.data_type(),
180+
target_field.data_type(),
181+
) {
182+
return _plan_err!(
183+
"Cannot cast struct field '{}' from type {:?} to type {:?}",
184+
target_field.name(),
185+
source_field.data_type(),
186+
target_field.data_type()
187+
);
188+
}
189+
}
190+
}
191+
}
192+
// Missing fields in source are OK - they'll be filled with nulls
193+
}
194+
195+
// Extra fields in source are OK - they'll be ignored
196+
Ok(true)
197+
}
198+
124199
#[cfg(test)]
125200
mod tests {
126201
use super::*;
@@ -202,4 +277,53 @@ mod tests {
202277
assert!(error_msg.contains("to struct type"));
203278
assert!(error_msg.contains("Source must be a struct"));
204279
}
280+
281+
#[test]
282+
fn test_validate_struct_compatibility_incompatible_types() {
283+
// Source struct: {field1: Binary, field2: String}
284+
let source_fields = vec![
285+
Arc::new(Field::new("field1", DataType::Binary, true)),
286+
Arc::new(Field::new("field2", DataType::Utf8, true)),
287+
];
288+
289+
// Target struct: {field1: Int32}
290+
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))];
291+
292+
let result = validate_struct_compatibility(&source_fields, &target_fields);
293+
assert!(result.is_err());
294+
let error_msg = result.unwrap_err().to_string();
295+
assert!(error_msg.contains("Cannot cast struct field 'field1'"));
296+
assert!(error_msg.contains("Binary"));
297+
assert!(error_msg.contains("Int32"));
298+
}
299+
300+
#[test]
301+
fn test_validate_struct_compatibility_compatible_types() {
302+
// Source struct: {field1: Int32, field2: String}
303+
let source_fields = vec![
304+
Arc::new(Field::new("field1", DataType::Int32, true)),
305+
Arc::new(Field::new("field2", DataType::Utf8, true)),
306+
];
307+
308+
// Target struct: {field1: Int64} (Int32 can cast to Int64)
309+
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int64, true))];
310+
311+
let result = validate_struct_compatibility(&source_fields, &target_fields);
312+
assert!(result.is_ok());
313+
assert!(result.unwrap());
314+
}
315+
316+
#[test]
317+
fn test_validate_struct_compatibility_missing_field_in_source() {
318+
// Source struct: {field2: String} (missing field1)
319+
let source_fields = vec![Arc::new(Field::new("field2", DataType::Utf8, true))];
320+
321+
// Target struct: {field1: Int32}
322+
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))];
323+
324+
// Should be OK - missing fields will be filled with nulls
325+
let result = validate_struct_compatibility(&source_fields, &target_fields);
326+
assert!(result.is_ok());
327+
assert!(result.unwrap());
328+
}
205329
}

datafusion/datasource/src/nested_schema_adapter/adapter.rs

Lines changed: 3 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ use crate::schema_adapter::{
2020
};
2121
use arrow::{
2222
array::ArrayRef,
23-
datatypes::{DataType::Struct, Field, FieldRef, Schema, SchemaRef},
23+
datatypes::{DataType::Struct, Field, Schema, SchemaRef},
2424
};
25-
use datafusion_common::nested_struct::cast_column;
26-
use datafusion_common::{plan_err, Result};
25+
use datafusion_common::nested_struct::{cast_column, validate_struct_compatibility};
26+
use datafusion_common::Result;
2727
use std::sync::Arc;
2828

2929
/// A SchemaAdapter that handles schema evolution for nested struct types
@@ -92,133 +92,3 @@ impl SchemaAdapter for NestedStructSchemaAdapter {
9292
))
9393
}
9494
}
95-
96-
/// Validates compatibility between source and target struct fields for casting operations.
97-
///
98-
/// This function implements comprehensive struct compatibility checking by examining:
99-
/// - Field name matching between source and target structs
100-
/// - Type castability for each matching field (including recursive struct validation)
101-
/// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls)
102-
/// - Proper handling of extra fields (source fields not in target are allowed - ignored)
103-
///
104-
/// # Compatibility Rules
105-
/// - **Field Matching**: Fields are matched by name (case-sensitive)
106-
/// - **Missing Target Fields**: Allowed - will be filled with null values during casting
107-
/// - **Extra Source Fields**: Allowed - will be ignored during casting
108-
/// - **Type Compatibility**: Each matching field must be castable using Arrow's type system
109-
/// - **Nested Structs**: Recursively validates nested struct compatibility
110-
///
111-
/// # Arguments
112-
/// * `source_fields` - Fields from the source struct type
113-
/// * `target_fields` - Fields from the target struct type
114-
///
115-
/// # Returns
116-
/// * `Ok(true)` if the structs are compatible for casting
117-
/// * `Err(DataFusionError)` with detailed error message if incompatible
118-
///
119-
/// # Examples
120-
/// ```ignore
121-
/// // Compatible: source has extra field, target has missing field
122-
/// // Source: {a: i32, b: string, c: f64}
123-
/// // Target: {a: i64, d: bool}
124-
/// // Result: Ok(true) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls
125-
///
126-
/// // Incompatible: matching field has incompatible types
127-
/// // Source: {a: string}
128-
/// // Target: {a: binary}
129-
/// // Result: Err(...) - string cannot cast to binary
130-
/// ```
131-
fn validate_struct_compatibility(
132-
source_fields: &[FieldRef],
133-
target_fields: &[FieldRef],
134-
) -> Result<bool> {
135-
// Check compatibility for each target field
136-
for target_field in target_fields {
137-
// Look for matching field in source by name
138-
if let Some(source_field) = source_fields
139-
.iter()
140-
.find(|f| f.name() == target_field.name())
141-
{
142-
// Check if the matching field types are compatible
143-
match (source_field.data_type(), target_field.data_type()) {
144-
// Recursively validate nested structs
145-
(Struct(source_nested), Struct(target_nested)) => {
146-
validate_struct_compatibility(source_nested, target_nested)?;
147-
}
148-
// For non-struct types, use the existing castability check
149-
_ => {
150-
if !arrow::compute::can_cast_types(
151-
source_field.data_type(),
152-
target_field.data_type(),
153-
) {
154-
return plan_err!(
155-
"Cannot cast struct field '{}' from type {:?} to type {:?}",
156-
target_field.name(),
157-
source_field.data_type(),
158-
target_field.data_type()
159-
);
160-
}
161-
}
162-
}
163-
}
164-
// Missing fields in source are OK - they'll be filled with nulls
165-
}
166-
167-
// Extra fields in source are OK - they'll be ignored
168-
Ok(true)
169-
}
170-
171-
#[cfg(test)]
172-
mod tests {
173-
use super::*;
174-
use arrow::datatypes::{DataType, Field};
175-
176-
#[test]
177-
fn test_validate_struct_compatibility_incompatible_types() {
178-
// Source struct: {field1: Binary, field2: String}
179-
let source_fields = vec![
180-
Arc::new(Field::new("field1", DataType::Binary, true)),
181-
Arc::new(Field::new("field2", DataType::Utf8, true)),
182-
];
183-
184-
// Target struct: {field1: Int32}
185-
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))];
186-
187-
let result = validate_struct_compatibility(&source_fields, &target_fields);
188-
assert!(result.is_err());
189-
let error_msg = result.unwrap_err().to_string();
190-
assert!(error_msg.contains("Cannot cast struct field 'field1'"));
191-
assert!(error_msg.contains("Binary"));
192-
assert!(error_msg.contains("Int32"));
193-
}
194-
195-
#[test]
196-
fn test_validate_struct_compatibility_compatible_types() {
197-
// Source struct: {field1: Int32, field2: String}
198-
let source_fields = vec![
199-
Arc::new(Field::new("field1", DataType::Int32, true)),
200-
Arc::new(Field::new("field2", DataType::Utf8, true)),
201-
];
202-
203-
// Target struct: {field1: Int64} (Int32 can cast to Int64)
204-
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int64, true))];
205-
206-
let result = validate_struct_compatibility(&source_fields, &target_fields);
207-
assert!(result.is_ok());
208-
assert!(result.unwrap());
209-
}
210-
211-
#[test]
212-
fn test_validate_struct_compatibility_missing_field_in_source() {
213-
// Source struct: {field2: String} (missing field1)
214-
let source_fields = vec![Arc::new(Field::new("field2", DataType::Utf8, true))];
215-
216-
// Target struct: {field1: Int32}
217-
let target_fields = vec![Arc::new(Field::new("field1", DataType::Int32, true))];
218-
219-
// Should be OK - missing fields will be filled with nulls
220-
let result = validate_struct_compatibility(&source_fields, &target_fields);
221-
assert!(result.is_ok());
222-
assert!(result.unwrap());
223-
}
224-
}

0 commit comments

Comments
 (0)