Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ pub fn infer_schema_from_files(
}
}

Schema::try_merge(&schemas)
Schema::try_merge(schemas)
}

// optional bounds of the reader, of the form (min line, max line).
Expand Down
69 changes: 33 additions & 36 deletions rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1836,7 +1836,7 @@ impl Schema {
/// ```
/// use arrow::datatypes::*;
///
/// let merged = Schema::try_merge(&vec![
/// let merged = Schema::try_merge(vec![
Copy link
Contributor Author

@alamb alamb Jan 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change in this example shows a pretty good example of how I think the usability (as well as performance) of this API is improved by this PR

/// Schema::new(vec![
/// Field::new("c1", DataType::Int64, false),
/// Field::new("c2", DataType::Utf8, false),
Expand All @@ -1857,44 +1857,40 @@ impl Schema {
/// ]),
/// );
/// ```
pub fn try_merge(schemas: &[Self]) -> Result<Self> {
let mut merged = Self::empty();

for schema in schemas {
for (key, value) in schema.metadata.iter() {
// merge metadata
match merged.metadata.get(key) {
Some(old_val) => {
if old_val != value {
pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self> {
schemas
.into_iter()
.try_fold(Self::empty(), |mut merged, schema| {
let Schema { metadata, fields } = schema;
for (key, value) in metadata.into_iter() {
// merge metadata
if let Some(old_val) = merged.metadata.get(&key) {
if old_val != &value {
return Err(ArrowError::SchemaError(
"Fail to merge schema due to conflicting metadata"
"Fail to merge schema due to conflicting metadata."
.to_string(),
));
}
}
None => {
merged.metadata.insert(key.clone(), value.clone());
}
merged.metadata.insert(key, value);
}
}
// merge fields
for field in &schema.fields {
let mut new_field = true;
for merged_field in &mut merged.fields {
if field.name != merged_field.name {
continue;
// merge fields
for field in fields.into_iter() {
let mut new_field = true;
for merged_field in &mut merged.fields {
if field.name != merged_field.name {
continue;
}
new_field = false;
merged_field.try_merge(&field)?
}
// found a new field, add to field list
if new_field {
merged.fields.push(field);
}
new_field = false;
merged_field.try_merge(field)?
}
// found a new field, add to field list
if new_field {
merged.fields.push(field.clone());
}
}
}

Ok(merged)
Ok(merged)
})
}

/// Returns an immutable reference of the vector of `Field` instances.
Expand Down Expand Up @@ -3044,7 +3040,8 @@ mod tests {
f2.set_metadata(Some(metadata2));

assert!(
Schema::try_merge(&[Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err()
Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
.is_err()
);

// 2. None + Some
Expand Down Expand Up @@ -3118,7 +3115,7 @@ mod tests {

#[test]
fn test_schema_merge() -> Result<()> {
let merged = Schema::try_merge(&[
let merged = Schema::try_merge(vec![
Schema::new(vec![
Field::new("first_name", DataType::Utf8, false),
Field::new("last_name", DataType::Utf8, false),
Expand Down Expand Up @@ -3177,7 +3174,7 @@ mod tests {

// support merge union fields
assert_eq!(
Schema::try_merge(&[
Schema::try_merge(vec![
Schema::new(vec![Field::new(
"c1",
DataType::Union(vec![
Expand Down Expand Up @@ -3207,7 +3204,7 @@ mod tests {
);

// incompatible field should throw error
assert!(Schema::try_merge(&[
assert!(Schema::try_merge(vec![
Schema::new(vec![
Field::new("first_name", DataType::Utf8, false),
Field::new("last_name", DataType::Utf8, false),
Expand All @@ -3217,7 +3214,7 @@ mod tests {
.is_err());

// incompatible metadata should throw error
assert!(Schema::try_merge(&[
assert!(Schema::try_merge(vec![
Schema::new_with_metadata(
vec![Field::new("first_name", DataType::Utf8, false)],
[("foo".to_string(), "bar".to_string()),]
Expand Down