Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyiceberg/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def is_struct(self) -> bool:
return isinstance(self, StructType)


class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton):
class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
"""Base class for all Iceberg Primitive Types."""

root: Any = Field()
Expand Down
13 changes: 13 additions & 0 deletions pyiceberg/utils/singleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,16 @@ def __new__(cls, *args, **kwargs): # type: ignore
if key not in cls._instances:
cls._instances[key] = super().__new__(cls)
return cls._instances[key]

def __deepcopy__(self, memo: Dict[int, Any]) -> Any:
"""
Prevent deep copy operations for singletons.

The IcebergRootModel inherits from Pydantic RootModel,
which has its own implementation of deepcopy. When deepcopy
runs, it calls the RootModel __deepcopy__ method and ignores
that it's a Singleton. To handle this, the order of inheritance
is adjusted and a __deepcopy__ method is implemented for
singletons that simply returns itself.
"""
return self
74 changes: 74 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,12 +865,70 @@ def generate_snapshot(
"refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}},
}

TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES = {
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 7,
"current-schema-id": 1,
"schemas": [
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 4, "name": "a", "required": True, "type": "decimal(16, 2)"},
{"id": 5, "name": "b", "required": True, "type": "decimal(16, 8)"},
{"id": 6, "name": "c", "required": True, "type": "fixed[16]"},
{"id": 7, "name": "d", "required": True, "type": "fixed[18]"},
],
}
],
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"properties": {"read.split.target.size": "134217728"},
"current-snapshot-id": 3055729675574597004,
"snapshots": [
{
"snapshot-id": 3051729675574597004,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/1.avro",
},
{
"snapshot-id": 3055729675574597004,
"parent-snapshot-id": 3051729675574597004,
"timestamp-ms": 1555100955770,
"sequence-number": 1,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/2.avro",
"schema-id": 1,
},
],
"snapshot-log": [
{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
{"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
],
"metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}],
"refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}},
}


@pytest.fixture
def example_table_metadata_v2() -> Dict[str, Any]:
return EXAMPLE_TABLE_METADATA_V2


@pytest.fixture
def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES


@pytest.fixture(scope="session")
def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
from pyiceberg.io.pyarrow import PyArrowFileIO
Expand Down Expand Up @@ -2064,6 +2122,22 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) -> Table:
)


@pytest.fixture
def table_v2_with_fixed_and_decimal_types(
table_metadata_v2_with_fixed_and_decimal_types: Dict[str, Any],
) -> Table:
table_metadata = TableMetadataV2(
**table_metadata_v2_with_fixed_and_decimal_types,
)
return Table(
identifier=("database", "table"),
metadata=table_metadata,
metadata_location=f"{table_metadata.location}/uuid.metadata.json",
io=load_file_io(),
catalog=NoopCatalog("NoopCatalog"),
)


@pytest.fixture
def table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots: Dict[str, Any]) -> Table:
table_metadata = TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
Expand Down
28 changes: 28 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@
BinaryType,
BooleanType,
DateType,
DecimalType,
DoubleType,
FixedType,
FloatType,
IntegerType,
ListType,
Expand Down Expand Up @@ -845,6 +847,32 @@ def test_update_metadata_with_multiple_updates(table_v1: Table) -> None:
assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"}


def test_update_metadata_schema_immutability(
table_v2_with_fixed_and_decimal_types: TableMetadataV2,
) -> None:
update = SetSnapshotRefUpdate(
ref_name="main",
type="branch",
snapshot_id=3051729675574597004,
max_ref_age_ms=123123123,
max_snapshot_age_ms=12312312312,
min_snapshots_to_keep=1,
)

new_metadata = update_table_metadata(
table_v2_with_fixed_and_decimal_types.metadata,
(update,),
)

assert new_metadata.schemas[0].fields == (
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=4, name="a", field_type=DecimalType(precision=16, scale=2), required=True),
NestedField(field_id=5, name="b", field_type=DecimalType(precision=16, scale=8), required=True),
NestedField(field_id=6, name="c", field_type=FixedType(length=16), required=True),
NestedField(field_id=7, name="d", field_type=FixedType(length=18), required=True),
)


def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
base_metadata = table_v1.metadata
base_metadata_backup = base_metadata.model_copy(deep=True)
Expand Down
11 changes: 11 additions & 0 deletions tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,3 +619,14 @@ def test_types_singleton() -> None:
assert id(BooleanType()) == id(BooleanType())
assert id(FixedType(22)) == id(FixedType(22))
assert id(FixedType(19)) != id(FixedType(25))


def test_deepcopy_of_singleton_fixed_type() -> None:
"""FixedType is a singleton, so deepcopy should return the same instance"""
from copy import deepcopy

list_of_fixed_types = [FixedType(22), FixedType(19)]
copied_list = deepcopy(list_of_fixed_types)

for lhs, rhs in zip(list_of_fixed_types, copied_list):
assert id(lhs) == id(rhs)