Remove 0.11.0 deprecated methods (#2983)

rambleraptor · kevinjqliu · web-flow · commit 78615d2b80db · 2026-01-29T20:38:24.000-08:00
&lt;!--
Thanks for opening a pull request!
--&gt;

&lt;!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. --&gt;
&lt;!-- Closes #${GITHUB_ISSUE_ID} --&gt;

# Rationale for this change
We have a couple features that are deprecated in 0.11. We should remove
them.

## Are these changes tested?
Tests should pass.

## Are there any user-facing changes?

&lt;!-- In the case of user-facing changes, please add the changelog label.
--&gt;

---------

Co-authored-by: Kevin Liu &lt;kevin.jq.liu@gmail.com&gt;
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
@@ -99,7 +99,6 @@
 GCS_VERSION_AWARE = "gcs.version-aware"
 HF_ENDPOINT = "hf.endpoint"
 HF_TOKEN = "hf.token"
-PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
 
 
 @runtime_checkable
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
@@ -100,7 +100,6 @@
     HDFS_KERB_TICKET,
     HDFS_PORT,
     HDFS_USER,
-    PYARROW_USE_LARGE_TYPES_ON_READ,
     S3_ACCESS_KEY_ID,
     S3_ANONYMOUS,
     S3_CONNECT_TIMEOUT,
@@ -179,7 +178,6 @@
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
 from pyiceberg.utils.decimal import unscaled_to_decimal
-from pyiceberg.utils.deprecated import deprecation_message
 from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -1756,14 +1754,6 @@ def to_table(self, tasks: Iterable[FileScanTask]) -> pa.Table:
             (pa.Table.from_batches([batch]) for batch in itertools.chain([first_batch], batches)), promote_options="permissive"
         )
 
-        if property_as_bool(self._io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, False):
-            deprecation_message(
-                deprecated_in="0.10.0",
-                removed_in="0.11.0",
-                help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}` will be removed.",
-            )
-            result = result.cast(arrow_schema)
-
         return result
 
     def to_record_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.RecordBatch]:
@@ -1872,7 +1862,6 @@ class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
     _file_schema: Schema
     _include_field_ids: bool
     _downcast_ns_timestamp_to_us: bool
-    _use_large_types: bool | None
     _projected_missing_fields: dict[int, Any]
     _allow_timestamp_tz_mismatch: bool
 
@@ -1881,26 +1870,17 @@ def __init__(
         file_schema: Schema,
         downcast_ns_timestamp_to_us: bool = False,
         include_field_ids: bool = False,
-        use_large_types: bool | None = None,
         projected_missing_fields: dict[int, Any] = EMPTY_DICT,
         allow_timestamp_tz_mismatch: bool = False,
     ) -> None:
         self._file_schema = file_schema
         self._include_field_ids = include_field_ids
         self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
-        self._use_large_types = use_large_types
         self._projected_missing_fields = projected_missing_fields
         # When True, allows projecting timestamptz (UTC) to timestamp (no tz).
         # Allowed for reading (aligns with Spark); disallowed for writing to enforce Iceberg spec's strict typing.
         self._allow_timestamp_tz_mismatch = allow_timestamp_tz_mismatch
 
-        if use_large_types is not None:
-            deprecation_message(
-                deprecated_in="0.10.0",
-                removed_in="0.11.0",
-                help_message="Argument `use_large_types` will be removed from ArrowProjectionVisitor",
-            )
-
     def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
         file_field = self._file_schema.find_field(field.field_id)
 
@@ -1949,8 +1929,6 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
                 target_schema = schema_to_pyarrow(
                     promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
                 )
-                if self._use_large_types is False:
-                    target_schema = _pyarrow_schema_ensure_small_types(target_schema)
                 return values.cast(target_schema)
 
         return values
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
@@ -29,7 +29,6 @@
 from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
-from pyiceberg.utils.deprecated import deprecation_message
 
 if TYPE_CHECKING:
     from pyiceberg.table.metadata import TableMetadata
@@ -344,54 +343,10 @@ def _partition_summary(self, update_metrics: UpdateMetrics) -> str:
         return ",".join([f"{prop}={val}" for prop, val in update_metrics.to_dict().items()])
 
 
-def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, str]) -> Summary:
-    for prop in {
-        TOTAL_DATA_FILES,
-        TOTAL_DELETE_FILES,
-        TOTAL_RECORDS,
-        TOTAL_FILE_SIZE,
-        TOTAL_POSITION_DELETES,
-        TOTAL_EQUALITY_DELETES,
-    }:
-        summary[prop] = "0"
-
-    def get_prop(prop: str) -> int:
-        value = previous_summary.get(prop) or "0"
-        try:
-            return int(value)
-        except ValueError as e:
-            raise ValueError(f"Could not parse summary property {prop} to an int: {value}") from e
-
-    if value := get_prop(TOTAL_DATA_FILES):
-        summary[DELETED_DATA_FILES] = str(value)
-    if value := get_prop(TOTAL_DELETE_FILES):
-        summary[REMOVED_DELETE_FILES] = str(value)
-    if value := get_prop(TOTAL_RECORDS):
-        summary[DELETED_RECORDS] = str(value)
-    if value := get_prop(TOTAL_FILE_SIZE):
-        summary[REMOVED_FILE_SIZE] = str(value)
-    if value := get_prop(TOTAL_POSITION_DELETES):
-        summary[REMOVED_POSITION_DELETES] = str(value)
-    if value := get_prop(TOTAL_EQUALITY_DELETES):
-        summary[REMOVED_EQUALITY_DELETES] = str(value)
-
-    return summary
-
-
-def update_snapshot_summaries(
-    summary: Summary, previous_summary: Mapping[str, str] | None = None, truncate_full_table: bool = False
-) -> Summary:
+def update_snapshot_summaries(summary: Summary, previous_summary: Mapping[str, str] | None = None) -> Summary:
     if summary.operation not in {Operation.APPEND, Operation.OVERWRITE, Operation.DELETE}:
         raise ValueError(f"Operation not implemented: {summary.operation}")
 
-    if truncate_full_table and summary.operation == Operation.OVERWRITE and previous_summary is not None:
-        deprecation_message(
-            deprecated_in="0.10.0",
-            removed_in="0.11.0",
-            help_message="The truncate-full-table shouldn't be used.",
-        )
-        summary = _truncate_table_summary(summary, previous_summary)
-
     if not previous_summary:
         previous_summary = {
             TOTAL_DATA_FILES: "0",
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
@@ -45,7 +45,6 @@
     NotNaN,
     NotNull,
 )
-from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
 from pyiceberg.io.pyarrow import (
     pyarrow_to_schema,
 )
@@ -1125,49 +1124,6 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
     assert result_table.schema.equals(expected_schema)
 
 
-@pytest.mark.integration
-@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
-def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
-    identifier = "default.test_table_scan_override_with_small_types"
-    arrow_table = pa.Table.from_arrays(
-        [
-            pa.array(["a", "b", "c"]),
-            pa.array(["a", "b", "c"]),
-            pa.array([b"a", b"b", b"c"]),
-            pa.array([["a", "b"], ["c", "d"], ["e", "f"]]),
-        ],
-        names=["string", "string-to-binary", "binary", "list"],
-    )
-
-    try:
-        catalog.drop_table(identifier)
-    except NoSuchTableError:
-        pass
-
-    tbl = catalog.create_table(
-        identifier,
-        schema=arrow_table.schema,
-    )
-
-    tbl.append(arrow_table)
-
-    with tbl.update_schema() as update_schema:
-        update_schema.update_column("string-to-binary", BinaryType())
-
-    tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
-    result_table = tbl.scan().to_arrow()
-
-    expected_schema = pa.schema(
-        [
-            pa.field("string", pa.string()),
-            pa.field("string-to-binary", pa.large_binary()),
-            pa.field("binary", pa.binary()),
-            pa.field("list", pa.list_(pa.string())),
-        ]
-    )
-    assert result_table.schema.equals(expected_schema)
-
-
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
 def test_empty_scan_ordered_str(catalog: Catalog) -> None: