Skip to content

Commit 78615d2

Browse files
Remove 0.11.0 deprecated methods (#2983)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change We have a couple features that are deprecated in 0.11. We should remove them. ## Are these changes tested? Tests should pass. ## Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. --> --------- Co-authored-by: Kevin Liu <kevin.jq.liu@gmail.com>
1 parent 062a252 commit 78615d2

File tree

4 files changed

+1
-113
lines changed

4 files changed

+1
-113
lines changed

pyiceberg/io/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@
9999
GCS_VERSION_AWARE = "gcs.version-aware"
100100
HF_ENDPOINT = "hf.endpoint"
101101
HF_TOKEN = "hf.token"
102-
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
103102

104103

105104
@runtime_checkable

pyiceberg/io/pyarrow.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@
100100
HDFS_KERB_TICKET,
101101
HDFS_PORT,
102102
HDFS_USER,
103-
PYARROW_USE_LARGE_TYPES_ON_READ,
104103
S3_ACCESS_KEY_ID,
105104
S3_ANONYMOUS,
106105
S3_CONNECT_TIMEOUT,
@@ -179,7 +178,6 @@
179178
from pyiceberg.utils.config import Config
180179
from pyiceberg.utils.datetime import millis_to_datetime
181180
from pyiceberg.utils.decimal import unscaled_to_decimal
182-
from pyiceberg.utils.deprecated import deprecation_message
183181
from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int
184182
from pyiceberg.utils.singleton import Singleton
185183
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
@@ -1756,14 +1754,6 @@ def to_table(self, tasks: Iterable[FileScanTask]) -> pa.Table:
17561754
(pa.Table.from_batches([batch]) for batch in itertools.chain([first_batch], batches)), promote_options="permissive"
17571755
)
17581756

1759-
if property_as_bool(self._io.properties, PYARROW_USE_LARGE_TYPES_ON_READ, False):
1760-
deprecation_message(
1761-
deprecated_in="0.10.0",
1762-
removed_in="0.11.0",
1763-
help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}` will be removed.",
1764-
)
1765-
result = result.cast(arrow_schema)
1766-
17671757
return result
17681758

17691759
def to_record_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.RecordBatch]:
@@ -1872,7 +1862,6 @@ class ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
18721862
_file_schema: Schema
18731863
_include_field_ids: bool
18741864
_downcast_ns_timestamp_to_us: bool
1875-
_use_large_types: bool | None
18761865
_projected_missing_fields: dict[int, Any]
18771866
_allow_timestamp_tz_mismatch: bool
18781867

@@ -1881,26 +1870,17 @@ def __init__(
18811870
file_schema: Schema,
18821871
downcast_ns_timestamp_to_us: bool = False,
18831872
include_field_ids: bool = False,
1884-
use_large_types: bool | None = None,
18851873
projected_missing_fields: dict[int, Any] = EMPTY_DICT,
18861874
allow_timestamp_tz_mismatch: bool = False,
18871875
) -> None:
18881876
self._file_schema = file_schema
18891877
self._include_field_ids = include_field_ids
18901878
self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
1891-
self._use_large_types = use_large_types
18921879
self._projected_missing_fields = projected_missing_fields
18931880
# When True, allows projecting timestamptz (UTC) to timestamp (no tz).
18941881
# Allowed for reading (aligns with Spark); disallowed for writing to enforce Iceberg spec's strict typing.
18951882
self._allow_timestamp_tz_mismatch = allow_timestamp_tz_mismatch
18961883

1897-
if use_large_types is not None:
1898-
deprecation_message(
1899-
deprecated_in="0.10.0",
1900-
removed_in="0.11.0",
1901-
help_message="Argument `use_large_types` will be removed from ArrowProjectionVisitor",
1902-
)
1903-
19041884
def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
19051885
file_field = self._file_schema.find_field(field.field_id)
19061886

@@ -1949,8 +1929,6 @@ def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
19491929
target_schema = schema_to_pyarrow(
19501930
promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
19511931
)
1952-
if self._use_large_types is False:
1953-
target_schema = _pyarrow_schema_ensure_small_types(target_schema)
19541932
return values.cast(target_schema)
19551933

19561934
return values

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
3030
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
3131
from pyiceberg.schema import Schema
32-
from pyiceberg.utils.deprecated import deprecation_message
3332

3433
if TYPE_CHECKING:
3534
from pyiceberg.table.metadata import TableMetadata
@@ -344,54 +343,10 @@ def _partition_summary(self, update_metrics: UpdateMetrics) -> str:
344343
return ",".join([f"{prop}={val}" for prop, val in update_metrics.to_dict().items()])
345344

346345

347-
def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, str]) -> Summary:
348-
for prop in {
349-
TOTAL_DATA_FILES,
350-
TOTAL_DELETE_FILES,
351-
TOTAL_RECORDS,
352-
TOTAL_FILE_SIZE,
353-
TOTAL_POSITION_DELETES,
354-
TOTAL_EQUALITY_DELETES,
355-
}:
356-
summary[prop] = "0"
357-
358-
def get_prop(prop: str) -> int:
359-
value = previous_summary.get(prop) or "0"
360-
try:
361-
return int(value)
362-
except ValueError as e:
363-
raise ValueError(f"Could not parse summary property {prop} to an int: {value}") from e
364-
365-
if value := get_prop(TOTAL_DATA_FILES):
366-
summary[DELETED_DATA_FILES] = str(value)
367-
if value := get_prop(TOTAL_DELETE_FILES):
368-
summary[REMOVED_DELETE_FILES] = str(value)
369-
if value := get_prop(TOTAL_RECORDS):
370-
summary[DELETED_RECORDS] = str(value)
371-
if value := get_prop(TOTAL_FILE_SIZE):
372-
summary[REMOVED_FILE_SIZE] = str(value)
373-
if value := get_prop(TOTAL_POSITION_DELETES):
374-
summary[REMOVED_POSITION_DELETES] = str(value)
375-
if value := get_prop(TOTAL_EQUALITY_DELETES):
376-
summary[REMOVED_EQUALITY_DELETES] = str(value)
377-
378-
return summary
379-
380-
381-
def update_snapshot_summaries(
382-
summary: Summary, previous_summary: Mapping[str, str] | None = None, truncate_full_table: bool = False
383-
) -> Summary:
346+
def update_snapshot_summaries(summary: Summary, previous_summary: Mapping[str, str] | None = None) -> Summary:
384347
if summary.operation not in {Operation.APPEND, Operation.OVERWRITE, Operation.DELETE}:
385348
raise ValueError(f"Operation not implemented: {summary.operation}")
386349

387-
if truncate_full_table and summary.operation == Operation.OVERWRITE and previous_summary is not None:
388-
deprecation_message(
389-
deprecated_in="0.10.0",
390-
removed_in="0.11.0",
391-
help_message="The truncate-full-table shouldn't be used.",
392-
)
393-
summary = _truncate_table_summary(summary, previous_summary)
394-
395350
if not previous_summary:
396351
previous_summary = {
397352
TOTAL_DATA_FILES: "0",

tests/integration/test_reads.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
NotNaN,
4646
NotNull,
4747
)
48-
from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
4948
from pyiceberg.io.pyarrow import (
5049
pyarrow_to_schema,
5150
)
@@ -1125,49 +1124,6 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
11251124
assert result_table.schema.equals(expected_schema)
11261125

11271126

1128-
@pytest.mark.integration
1129-
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
1130-
def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
1131-
identifier = "default.test_table_scan_override_with_small_types"
1132-
arrow_table = pa.Table.from_arrays(
1133-
[
1134-
pa.array(["a", "b", "c"]),
1135-
pa.array(["a", "b", "c"]),
1136-
pa.array([b"a", b"b", b"c"]),
1137-
pa.array([["a", "b"], ["c", "d"], ["e", "f"]]),
1138-
],
1139-
names=["string", "string-to-binary", "binary", "list"],
1140-
)
1141-
1142-
try:
1143-
catalog.drop_table(identifier)
1144-
except NoSuchTableError:
1145-
pass
1146-
1147-
tbl = catalog.create_table(
1148-
identifier,
1149-
schema=arrow_table.schema,
1150-
)
1151-
1152-
tbl.append(arrow_table)
1153-
1154-
with tbl.update_schema() as update_schema:
1155-
update_schema.update_column("string-to-binary", BinaryType())
1156-
1157-
tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
1158-
result_table = tbl.scan().to_arrow()
1159-
1160-
expected_schema = pa.schema(
1161-
[
1162-
pa.field("string", pa.string()),
1163-
pa.field("string-to-binary", pa.large_binary()),
1164-
pa.field("binary", pa.binary()),
1165-
pa.field("list", pa.list_(pa.string())),
1166-
]
1167-
)
1168-
assert result_table.schema.equals(expected_schema)
1169-
1170-
11711127
@pytest.mark.integration
11721128
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
11731129
def test_empty_scan_ordered_str(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)