-
Notifications
You must be signed in to change notification settings - Fork 401
Description
Apache Iceberg Rust version
None
Describe the bug
When upgrading a table from v2 to v3, metadata serialization fails if the table contains existing snapshots. This occurs because SnapshotV3 requires first-row-id and added-rows, but snapshots created before the upgrade do not have these values.
According to the spec, when a table is upgraded to v3, existing snapshots should remain unmodified with first-row-id unset or null. iceberg-rust violates this by requiring all snapshots to have row lineage fields.
The error message is: "v3 Snapshots must have first-row-id and rows-added fields set."
This effectively makes v2 to v3 upgrades impossible for any table with snapshot history.
To Reproduce
Add the following test to crates/iceberg/src/spec/table_metadata.rs
fn test_v2_to_v3_upgrade_with_existing_snapshot_serialization_fails() {
// Create a v2 table metadata
let schema = Schema::builder()
.with_fields(vec![
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Long)).into(),
])
.build()
.unwrap();
let v2_metadata = TableMetadataBuilder::new(
schema,
PartitionSpec::unpartition_spec().into_unbound(),
SortOrder::unsorted_order(),
"s3://bucket/test/location".to_string(),
FormatVersion::V2,
HashMap::new(),
)
.unwrap()
.build()
.unwrap()
.metadata;
// Add a v2 snapshot
let snapshot = Snapshot::builder()
.with_snapshot_id(1)
.with_timestamp_ms(v2_metadata.last_updated_ms + 1)
.with_sequence_number(1)
.with_schema_id(0)
.with_manifest_list("s3://bucket/test/metadata/snap-1.avro")
.with_summary(Summary {
operation: Operation::Append,
additional_properties: HashMap::from([(
"added-data-files".to_string(),
"1".to_string(),
)]),
})
.build();
let v2_with_snapshot = v2_metadata
.into_builder(Some("s3://bucket/test/metadata/v00001.json".to_string()))
.add_snapshot(snapshot)
.unwrap()
.set_ref(crate::spec::snapshot::MAIN_BRANCH, SnapshotReference {
snapshot_id: 1,
retention: SnapshotRetention::Branch {
min_snapshots_to_keep: None,
max_snapshot_age_ms: None,
max_ref_age_ms: None,
},
})
.unwrap()
.build()
.unwrap()
.metadata;
// Verify v2 serialization works fine
let v2_json = serde_json::to_string(&v2_with_snapshot);
assert!(v2_json.is_ok(), "v2 serialization should work");
// Upgrade to v3
let v3_metadata = v2_with_snapshot
.into_builder(Some("s3://bucket/test/metadata/v00002.json".to_string()))
.upgrade_format_version(FormatVersion::V3)
.unwrap()
.build()
.unwrap()
.metadata;
assert_eq!(v3_metadata.format_version, FormatVersion::V3);
assert_eq!(v3_metadata.snapshots.len(), 1);
// Verify the snapshot has no row_range
let snapshot = v3_metadata.snapshots.values().next().unwrap();
assert!(
snapshot.row_range().is_none(),
"Snapshot should have no row_range after upgrade"
);
// Try to serialize v3 metadata
let v3_json = serde_json::to_string(&v3_metadata);
assert!(v3_json.is_err());
}Expected behavior
Upgraded v3 metadata should serialize successfully
Willingness to contribute
None