Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion crates/catalog/rest/testdata/create_table_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
"write.summary.partition-limit": "100",
"write.parquet.compression-codec": "zstd"
},
"current-snapshot-id": -1,
"refs": {},
"snapshots": [],
"snapshot-log": [],
Expand Down
1 change: 0 additions & 1 deletion crates/catalog/rest/testdata/update_table_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"write.summary.partition-limit": "100",
"write.parquet.compression-codec": "zstd"
},
"current-snapshot-id": -1,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should add another test case rather than simply removing this?

"refs": {},
"snapshots": [],
"snapshot-log": [],
Expand Down
6 changes: 2 additions & 4 deletions crates/iceberg/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ mod tests {
use crate::spec::{
DataContentType, DataFileBuilder, DataFileFormat, Datum, FormatVersion, Literal, Manifest,
ManifestContentType, ManifestEntry, ManifestListWriter, ManifestMetadata, ManifestStatus,
ManifestWriter, Struct, TableMetadata, EMPTY_SNAPSHOT_ID,
ManifestWriter, Struct, TableMetadata,
};
use crate::table::Table;
use crate::TableIdent;
Expand Down Expand Up @@ -632,9 +632,7 @@ mod tests {
.new_output(current_snapshot.manifest_list())
.unwrap(),
current_snapshot.snapshot_id(),
current_snapshot
.parent_snapshot_id()
.unwrap_or(EMPTY_SNAPSHOT_ID),
current_snapshot.parent_snapshot_id(),
current_snapshot.sequence_number(),
);
manifest_list_write
Expand Down
20 changes: 11 additions & 9 deletions crates/iceberg/src/spec/manifest_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,20 @@ impl ManifestListWriter {
pub fn v2(
output_file: OutputFile,
snapshot_id: i64,
parent_snapshot_id: i64,
parent_snapshot_id: Option<i64>,
sequence_number: i64,
) -> Self {
let metadata = HashMap::from_iter([
let mut metadata = HashMap::from_iter([
("snapshot-id".to_string(), snapshot_id.to_string()),
(
"parent-snapshot-id".to_string(),
parent_snapshot_id.to_string(),
),
("sequence-number".to_string(), sequence_number.to_string()),
("format-version".to_string(), "2".to_string()),
]);
parent_snapshot_id.map(|parent_snapshot_id| {
metadata.insert(
"parent-snapshot-id".to_string(),
parent_snapshot_id.to_string(),
)
});
Self::new(
FormatVersion::V2,
output_file,
Expand Down Expand Up @@ -1204,7 +1206,7 @@ mod test {
let mut writer = ManifestListWriter::v2(
file_io.new_output(full_path.clone()).unwrap(),
1646658105718557341,
1646658105718557341,
Some(1646658105718557341),
1,
);

Expand Down Expand Up @@ -1382,7 +1384,7 @@ mod test {
let io = FileIOBuilder::new_fs_io().build().unwrap();
let output_file = io.new_output(path.to_str().unwrap()).unwrap();

let mut writer = ManifestListWriter::v2(output_file, snapshot_id, 0, seq_num);
let mut writer = ManifestListWriter::v2(output_file, snapshot_id, None, seq_num);
writer
.add_manifests(expected_manifest_list.entries.clone().into_iter())
.unwrap();
Expand Down Expand Up @@ -1436,7 +1438,7 @@ mod test {
let io = FileIOBuilder::new_fs_io().build().unwrap();
let output_file = io.new_output(path.to_str().unwrap()).unwrap();

let mut writer = ManifestListWriter::v2(output_file, 1646658105718557341, 0, 1);
let mut writer = ManifestListWriter::v2(output_file, 1646658105718557341, None, 1);
writer
.add_manifests(expected_manifest_list.entries.clone().into_iter())
.unwrap();
Expand Down
135 changes: 108 additions & 27 deletions crates/iceberg/src/spec/table_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

use serde::{Deserialize, Serialize};
use serde_repr::{Deserialize_repr, Serialize_repr};
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::{cmp::Ordering, env};
use std::{collections::HashMap, sync::Arc};
use uuid::Uuid;

Expand All @@ -37,6 +37,8 @@ use crate::error::Result;
use crate::{Error, ErrorKind, TableCreation};
use chrono::{DateTime, TimeZone, Utc};

pub(crate) const LEGACY_CURRENT_SNAPSHOT_ID: &str = "legacy-current-snapshot-id";

static MAIN_BRANCH: &str = "main";
static DEFAULT_SPEC_ID: i32 = 0;
static DEFAULT_SORT_ORDER_ID: i64 = 0;
Expand Down Expand Up @@ -372,6 +374,41 @@ impl TableMetadataBuilder {
}
}

/// Config values for serializing and deserializing TableMetadata
pub struct TableMetadataConfig {
use_legacy_snapshot_id: bool,
}

impl Default for TableMetadataConfig {
fn default() -> Self {
Self::new()
}
}

impl TableMetadataConfig {
/// Populate config values for TableMetadata
pub fn new() -> Self {
Self {
// todo: populate settings from a config file and from the environment
use_legacy_snapshot_id: matches!(env::var(LEGACY_CURRENT_SNAPSHOT_ID), Ok(val) if val == "true"),
}
}

/// Skip serializing current snapshot if snapshot is none
/// and `legacy-current-snapshot-id` is not enabled.
pub fn skip_current_snapshot(current_snapshot_id: &Option<i64>) -> bool {
if TableMetadataConfig::new().use_legacy_snapshot_id() {
return false;
}
current_snapshot_id.is_none()
}

/// Check if use of legacy snapshot id is allowed or not
pub fn use_legacy_snapshot_id(&self) -> bool {
self.use_legacy_snapshot_id
}
}

pub(super) mod _serde {
/// This is a helper module that defines types to help with serialization/deserialization.
/// For deserialization the input first gets read into either the [TableMetadataV1] or [TableMetadataV2] struct
Expand All @@ -389,6 +426,7 @@ pub(super) mod _serde {
schema::_serde::{SchemaV1, SchemaV2},
snapshot::_serde::{SnapshotV1, SnapshotV2},
PartitionField, PartitionSpec, Schema, SnapshotReference, SnapshotRetention, SortOrder,
TableMetadataConfig,
},
Error, ErrorKind,
};
Expand Down Expand Up @@ -460,7 +498,7 @@ pub(super) mod _serde {
pub last_partition_id: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub properties: Option<HashMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(skip_serializing_if = "TableMetadataConfig::skip_current_snapshot")]
pub current_snapshot_id: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub snapshots: Option<Vec<SnapshotV1>>,
Expand Down Expand Up @@ -535,11 +573,7 @@ pub(super) mod _serde {
impl TryFrom<TableMetadataV2> for TableMetadata {
type Error = Error;
fn try_from(value: TableMetadataV2) -> Result<Self, self::Error> {
let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
None
} else {
value.current_snapshot_id
};
let current_snapshot_id = value.current_snapshot_id;
let schemas = HashMap::from_iter(
value
.schemas
Expand Down Expand Up @@ -673,16 +707,10 @@ pub(super) mod _serde {
.unwrap_or_else(|| partition_specs.keys().copied().max().unwrap_or_default()),
partition_specs,
schemas,

properties: value.properties.unwrap_or_default(),
current_snapshot_id: if let &Some(id) = &value.current_snapshot_id {
if id == EMPTY_SNAPSHOT_ID {
None
} else {
Some(id)
}
} else {
value.current_snapshot_id
current_snapshot_id: match &value.current_snapshot_id {
Some(id) if id == &EMPTY_SNAPSHOT_ID => None,
_ => value.current_snapshot_id,
},
snapshots: value
.snapshots
Expand Down Expand Up @@ -747,7 +775,7 @@ pub(super) mod _serde {
default_spec_id: v.default_spec_id,
last_partition_id: v.last_partition_id,
properties: Some(v.properties),
current_snapshot_id: v.current_snapshot_id.or(Some(-1)),
current_snapshot_id: v.current_snapshot_id,
snapshots: if v.snapshots.is_empty() {
Some(vec![])
} else {
Expand Down Expand Up @@ -831,7 +859,13 @@ pub(super) mod _serde {
} else {
Some(v.properties)
},
current_snapshot_id: v.current_snapshot_id.or(Some(-1)),
current_snapshot_id: if v.current_snapshot_id.is_none()
&& TableMetadataConfig::new().use_legacy_snapshot_id()
{
Some(EMPTY_SNAPSHOT_ID)
} else {
v.current_snapshot_id
},
snapshots: if v.snapshots.is_empty() {
None
} else {
Expand Down Expand Up @@ -924,7 +958,7 @@ impl SnapshotLog {

#[cfg(test)]
mod tests {
use std::{collections::HashMap, fs, sync::Arc};
use std::{collections::HashMap, env, fs, sync::Arc};

use anyhow::Result;
use uuid::Uuid;
Expand All @@ -933,9 +967,10 @@ mod tests {

use crate::{
spec::{
table_metadata::TableMetadata, NestedField, NullOrder, Operation, PartitionField,
PartitionSpec, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention,
SortDirection, SortField, SortOrder, Summary, Transform, Type,
table_metadata::TableMetadata, table_metadata::LEGACY_CURRENT_SNAPSHOT_ID, NestedField,
NullOrder, Operation, PartitionField, PartitionSpec, PrimitiveType, Schema, Snapshot,
SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder, Summary,
Transform, Type,
},
TableCreation,
};
Expand Down Expand Up @@ -1473,10 +1508,11 @@ mod tests {
check_table_metadata_serde(&metadata, expected);
}

#[test]
fn test_table_metadata_v1_file_valid() {
let metadata =
fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();
fn check_v1_table_with_valid_metadata_file(
file_location: &str,
current_snapshot_id: Option<i64>,
) {
let metadata = fs::read_to_string(file_location).unwrap();

let schema = Schema::builder()
.with_schema_id(0)
Expand Down Expand Up @@ -1532,7 +1568,7 @@ mod tests {
refs: HashMap::from_iter(vec![(
"main".to_string(),
SnapshotReference {
snapshot_id: -1,
snapshot_id: current_snapshot_id.unwrap_or_default(),
retention: SnapshotRetention::Branch {
min_snapshots_to_keep: None,
max_snapshot_age_ms: None,
Expand All @@ -1545,6 +1581,51 @@ mod tests {
check_table_metadata_serde(&metadata, expected);
}

fn check_v1_table_err_for_legacy_snapshot() {
let metadata =
fs::read_to_string("testdata/table_metadata/TableMetadataV1LegacySnapshotId.json")
.unwrap();

let desered: Result<TableMetadata, serde_json::Error> = serde_json::from_str(&metadata);

assert_eq!(desered.unwrap().current_snapshot_id, None)
}

#[test]
fn test_table_metadata_v1_file() {
// test metadata file v1 with current_snapshot_id optional
let metadata =
fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();

let desered: Result<TableMetadata, serde_json::Error> = serde_json::from_str(&metadata);

assert_eq!(desered.unwrap().current_snapshot_id, None)
}

#[test]
fn test_table_metadata_v1_file_for_legacy_snapshot() {
// test metadata file v1 with `legacy-current-snapshot-id` flag on and current_snapshot_id not optional
env::set_var(LEGACY_CURRENT_SNAPSHOT_ID, "true");
assert_eq!(env::var(LEGACY_CURRENT_SNAPSHOT_ID), Ok("true".to_string()));
check_v1_table_with_valid_metadata_file(
"testdata/table_metadata/TableMetadataV1LegacySnapshotId.json",
Some(-1),
);

// test metadata file v1 with `legacy-current-snapshot-id` flag off and current_snapshot_id not optional
env::set_var(LEGACY_CURRENT_SNAPSHOT_ID, "false");
assert_eq!(
env::var(LEGACY_CURRENT_SNAPSHOT_ID),
Ok("false".to_string())
);
check_v1_table_err_for_legacy_snapshot();

// test metadata file v1 with `legacy-current-snapshot-id` flag unset and current_snapshot_id not optional
env::remove_var(LEGACY_CURRENT_SNAPSHOT_ID);
assert!(env::var(LEGACY_CURRENT_SNAPSHOT_ID).is_err());
check_v1_table_err_for_legacy_snapshot();
}

#[test]
fn test_table_metadata_v2_schema_not_found() {
let metadata =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,5 @@
},
"partition-spec": [],
"properties": {},
"current-snapshot-id": -1,
"snapshots": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"format-version": 1,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": "s3://bucket/test/location",
"last-updated-ms": 1602638573874,
"last-column-id": 3,
"schema": {
"type": "struct",
"fields": [
{
"id": 1,
"name": "x",
"required": true,
"type": "long"
},
{
"id": 2,
"name": "y",
"required": true,
"type": "long",
"doc": "comment"
},
{
"id": 3,
"name": "z",
"required": true,
"type": "long"
}
]
},
"partition-spec": [
{
"name": "x",
"transform": "identity",
"source-id": 1,
"field-id": 1000
}
],
"properties": {},
"current-snapshot-id": -1,
"snapshots": []
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,5 @@
}
],
"properties": {},
"current-snapshot-id": -1,
"snapshots": []
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@
}
],
"properties": {},
"current-snapshot-id": -1,
"snapshots": [],
"snapshot-log": [],
"metadata-log": []
Expand Down
Loading