Skip to content

Commit 53fc25b

Browse files
fix: Prefer remote schema instead of throwing on materialize conflicts (#1644)
1 parent 48d10d1 commit 53fc25b

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

bigframes/session/executor.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import abc
1818
import dataclasses
19+
import itertools
1920
from typing import Callable, Iterator, Literal, Mapping, Optional, Sequence, Union
2021

2122
from google.cloud import bigquery
@@ -37,10 +38,16 @@ def to_arrow_table(self) -> pyarrow.Table:
3738
# Need to provide schema if no result rows, as arrow can't infer
3839
# If ther are rows, it is safest to infer schema from batches.
3940
# Any discrepencies between predicted schema and actual schema will produce errors.
40-
return pyarrow.Table.from_batches(
41-
self.arrow_batches(),
42-
self.schema.to_pyarrow() if not self.total_rows else None,
43-
)
41+
batches = iter(self.arrow_batches())
42+
peek_it = itertools.islice(batches, 0, 1)
43+
peek_value = list(peek_it)
44+
# TODO: Enforce our internal schema on the table for consistency
45+
if len(peek_value) > 0:
46+
return pyarrow.Table.from_batches(
47+
itertools.chain(peek_value, batches), # reconstruct
48+
)
49+
else:
50+
return self.schema.to_pyarrow().empty_table()
4451

4552

4653
class Executor(abc.ABC):

tests/system/small/test_dataframe.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5283,6 +5283,16 @@ def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_cre
52835283
assert not loaded_scalars_df_index.empty
52845284

52855285

5286+
def test_read_gbq_to_pandas_no_exec(unordered_session: bigframes.Session):
5287+
metrics = unordered_session._metrics
5288+
execs_pre = metrics.execution_count
5289+
df = unordered_session.read_gbq("bigquery-public-data.ml_datasets.penguins")
5290+
df.to_pandas()
5291+
execs_post = metrics.execution_count
5292+
assert df.shape == (344, 7)
5293+
assert execs_pre == execs_post
5294+
5295+
52865296
def test_to_gbq_table_labels(scalars_df_index):
52875297
destination_table = "bigframes-dev.bigframes_tests_sys.table_labels"
52885298
result_table = scalars_df_index.to_gbq(

0 commit comments

Comments
 (0)