Skip to content

Commit 85de0c6

Browse files
committed
fix spec_id passing
1 parent 4184461 commit 85de0c6

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

pyiceberg/catalog/rest/__init__.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
from collections import deque
18-
from collections.abc import Iterator
1918
from enum import Enum
2019
from typing import (
2120
TYPE_CHECKING,
@@ -460,18 +459,17 @@ def _fetch_scan_tasks(self, identifier: str | Identifier, plan_task: str) -> Sca
460459

461460
return ScanTasks.model_validate_json(response.text)
462461

463-
def plan_scan(self, identifier: str | Identifier, request: PlanTableScanRequest) -> Iterator["FileScanTask"]:
464-
"""Plan a table scan and yield FileScanTasks.
462+
def plan_scan(self, identifier: str | Identifier, request: PlanTableScanRequest) -> list["FileScanTask"]:
463+
"""Plan a table scan and return FileScanTasks.
465464
466465
Handles the full scan planning lifecycle including pagination.
467-
Each response batch is self-contained, so tasks are yielded as received.
468466
469467
Args:
470468
identifier: Table identifier.
471469
request: The scan plan request parameters.
472470
473-
Yields:
474-
FileScanTask objects ready for execution.
471+
Returns:
472+
List of FileScanTask objects ready for execution.
475473
476474
Raises:
477475
RuntimeError: If planning fails, is cancelled, or returns unexpected response.
@@ -492,19 +490,23 @@ def plan_scan(self, identifier: str | Identifier, request: PlanTableScanRequest)
492490
if not isinstance(response, PlanCompleted):
493491
raise RuntimeError(f"Invalid planStatus for response: {type(response).__name__}")
494492

495-
# Yield tasks from initial response
493+
tasks: list[FileScanTask] = []
494+
495+
# Collect tasks from initial response
496496
for task in response.file_scan_tasks:
497-
yield FileScanTask.from_rest_response(task, response.delete_files)
497+
tasks.append(FileScanTask.from_rest_response(task, response.delete_files))
498498

499-
# Fetch and yield from additional batches
499+
# Fetch and collect from additional batches
500500
pending_tasks = deque(response.plan_tasks)
501501
while pending_tasks:
502502
plan_task = pending_tasks.popleft()
503503
batch = self._fetch_scan_tasks(identifier, plan_task)
504504
for task in batch.file_scan_tasks:
505-
yield FileScanTask.from_rest_response(task, batch.delete_files)
505+
tasks.append(FileScanTask.from_rest_response(task, batch.delete_files))
506506
pending_tasks.extend(batch.plan_tasks)
507507

508+
return tasks
509+
508510
def _create_legacy_oauth2_auth_manager(self, session: Session) -> AuthManager:
509511
"""Create the LegacyOAuth2AuthManager by fetching required properties.
510512

pyiceberg/catalog/rest/scan_planning.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class PlanTableScanRequest(IcebergBaseModel):
198198
start_snapshot_id: int | None = Field(alias="start-snapshot-id", default=None)
199199
end_snapshot_id: int | None = Field(alias="end-snapshot-id", default=None)
200200
stats_fields: list[str] | None = Field(alias="stats-fields", default=None)
201+
min_rows_requested: int | None = Field(alias="min-rows-requested", default=None)
201202

202203
@model_validator(mode="after")
203204
def _validate_snapshot_fields(self) -> PlanTableScanRequest:

pyiceberg/table/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,7 +1847,7 @@ def from_rest_response(
18471847
)
18481848

18491849

1850-
def _rest_file_to_data_file(rest_file: RESTContentFile, *, include_stats: bool) -> DataFile:
1850+
def _rest_file_to_data_file(rest_file: RESTContentFile, include_stats: bool) -> DataFile:
18511851
"""Convert a REST content file to a manifest DataFile."""
18521852
from pyiceberg.catalog.rest.scan_planning import CONTENT_TYPE_MAP
18531853

@@ -1856,7 +1856,7 @@ def _rest_file_to_data_file(rest_file: RESTContentFile, *, include_stats: bool)
18561856
null_value_counts = getattr(rest_file, "null_value_counts", None)
18571857
nan_value_counts = getattr(rest_file, "nan_value_counts", None)
18581858

1859-
return DataFile.from_args(
1859+
data_file = DataFile.from_args(
18601860
content=CONTENT_TYPE_MAP[rest_file.content],
18611861
file_path=rest_file.file_path,
18621862
file_format=rest_file.file_format,
@@ -1869,8 +1869,9 @@ def _rest_file_to_data_file(rest_file: RESTContentFile, *, include_stats: bool)
18691869
nan_value_counts=nan_value_counts.to_dict() if include_stats and nan_value_counts else None,
18701870
split_offsets=rest_file.split_offsets,
18711871
sort_order_id=rest_file.sort_order_id,
1872-
spec_id=rest_file.spec_id,
18731872
)
1873+
data_file.spec_id = rest_file.spec_id
1874+
return data_file
18741875

18751876

18761877
def _open_manifest(

0 commit comments

Comments
 (0)