Skip to content

Commit 76db295

Browse files
authored
feat: add bigquery.create_external_table method (#2415)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent fcb5bc1 commit 76db295

File tree

5 files changed

+307
-0
lines changed

5 files changed

+307
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6161
from bigframes.bigquery._operations.sql import sql_scalar
6262
from bigframes.bigquery._operations.struct import struct
63+
from bigframes.bigquery.table import create_external_table
6364
from bigframes.core.logging import log_adapter
6465

6566
_functions = [
@@ -104,6 +105,8 @@
104105
sql_scalar,
105106
# struct ops
106107
struct,
108+
# table ops
109+
create_external_table,
107110
]
108111

109112
_module = sys.modules[__name__]
@@ -155,6 +158,8 @@
155158
"sql_scalar",
156159
# struct ops
157160
"struct",
161+
# table ops
162+
"create_external_table",
158163
# Modules / SQL namespaces
159164
"ai",
160165
"ml",

bigframes/bigquery/table.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Mapping, Optional, Union
18+
19+
import bigframes_vendored.constants
20+
import google.cloud.bigquery
21+
import pandas as pd
22+
23+
import bigframes.core.logging.log_adapter as log_adapter
24+
import bigframes.core.sql.table
25+
import bigframes.session
26+
27+
28+
def _get_table_metadata(
29+
*,
30+
bqclient: google.cloud.bigquery.Client,
31+
table_name: str,
32+
) -> pd.Series:
33+
table_metadata = bqclient.get_table(table_name)
34+
table_dict = table_metadata.to_api_repr()
35+
return pd.Series(table_dict)
36+
37+
38+
@log_adapter.method_logger(custom_base_name="bigquery_table")
39+
def create_external_table(
40+
table_name: str,
41+
*,
42+
replace: bool = False,
43+
if_not_exists: bool = False,
44+
columns: Optional[Mapping[str, str]] = None,
45+
partition_columns: Optional[Mapping[str, str]] = None,
46+
connection_name: Optional[str] = None,
47+
options: Mapping[str, Union[str, int, float, bool, list]],
48+
session: Optional[bigframes.session.Session] = None,
49+
) -> pd.Series:
50+
"""
51+
Creates a BigQuery external table.
52+
53+
See the `BigQuery CREATE EXTERNAL TABLE DDL syntax
54+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_external_table_statement>`_
55+
for additional reference.
56+
57+
Args:
58+
table_name (str):
59+
The name of the table in BigQuery.
60+
replace (bool, default False):
61+
Whether to replace the table if it already exists.
62+
if_not_exists (bool, default False):
63+
Whether to ignore the error if the table already exists.
64+
columns (Mapping[str, str], optional):
65+
The table's schema.
66+
partition_columns (Mapping[str, str], optional):
67+
The table's partition columns.
68+
connection_name (str, optional):
69+
The connection to use for the table.
70+
options (Mapping[str, Union[str, int, float, bool, list]]):
71+
The OPTIONS clause, which specifies the table options.
72+
session (bigframes.session.Session, optional):
73+
The session to use. If not provided, the default session is used.
74+
75+
Returns:
76+
pandas.Series:
77+
A Series with object dtype containing the table metadata. Reference
78+
the `BigQuery Table REST API reference
79+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table>`_
80+
for available fields.
81+
"""
82+
import bigframes.pandas as bpd
83+
84+
sql = bigframes.core.sql.table.create_external_table_ddl(
85+
table_name=table_name,
86+
replace=replace,
87+
if_not_exists=if_not_exists,
88+
columns=columns,
89+
partition_columns=partition_columns,
90+
connection_name=connection_name,
91+
options=options,
92+
)
93+
94+
if session is None:
95+
bpd.read_gbq_query(sql)
96+
session = bpd.get_global_session()
97+
assert (
98+
session is not None
99+
), f"Missing connection to BigQuery. Please report how you encountered this error at {bigframes_vendored.constants.FEEDBACK_LINK}."
100+
else:
101+
session.read_gbq_query(sql)
102+
103+
return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)

bigframes/core/sql/table.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Mapping, Optional, Union
18+
19+
20+
def create_external_table_ddl(
21+
table_name: str,
22+
*,
23+
replace: bool = False,
24+
if_not_exists: bool = False,
25+
columns: Optional[Mapping[str, str]] = None,
26+
partition_columns: Optional[Mapping[str, str]] = None,
27+
connection_name: Optional[str] = None,
28+
options: Mapping[str, Union[str, int, float, bool, list]],
29+
) -> str:
30+
"""Generates the CREATE EXTERNAL TABLE DDL statement."""
31+
statement = ["CREATE"]
32+
if replace:
33+
statement.append("OR REPLACE")
34+
statement.append("EXTERNAL TABLE")
35+
if if_not_exists:
36+
statement.append("IF NOT EXISTS")
37+
statement.append(table_name)
38+
39+
if columns:
40+
column_defs = ", ".join([f"{name} {typ}" for name, typ in columns.items()])
41+
statement.append(f"({column_defs})")
42+
43+
if connection_name:
44+
statement.append(f"WITH CONNECTION `{connection_name}`")
45+
46+
if partition_columns:
47+
part_defs = ", ".join(
48+
[f"{name} {typ}" for name, typ in partition_columns.items()]
49+
)
50+
statement.append(f"WITH PARTITION COLUMNS ({part_defs})")
51+
52+
if options:
53+
opts = []
54+
for key, value in options.items():
55+
if isinstance(value, str):
56+
value_sql = repr(value)
57+
opts.append(f"{key} = {value_sql}")
58+
elif isinstance(value, bool):
59+
opts.append(f"{key} = {str(value).upper()}")
60+
elif isinstance(value, list):
61+
list_str = ", ".join([repr(v) for v in value])
62+
opts.append(f"{key} = [{list_str}]")
63+
else:
64+
opts.append(f"{key} = {value}")
65+
options_str = ", ".join(opts)
66+
statement.append(f"OPTIONS ({options_str})")
67+
68+
return " ".join(statement)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
17+
18+
def test_create_external_table(session, dataset_id, bq_connection):
19+
table_name = f"{dataset_id}.test_object_table"
20+
uri = "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*"
21+
22+
# Create the external table
23+
table = bbq.create_external_table(
24+
table_name,
25+
connection_name=bq_connection,
26+
options={"object_metadata": "SIMPLE", "uris": [uri]},
27+
session=session,
28+
)
29+
assert table is not None
30+
31+
# Read the table to verify
32+
import bigframes.pandas as bpd
33+
34+
bf_df = bpd.read_gbq(table_name)
35+
pd_df = bf_df.to_pandas()
36+
assert len(pd_df) > 0

tests/unit/bigquery/test_table.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from unittest import mock
16+
17+
import pytest
18+
19+
import bigframes.bigquery.table
20+
import bigframes.core.sql.table
21+
import bigframes.session
22+
23+
24+
@pytest.fixture
25+
def mock_session():
26+
return mock.create_autospec(spec=bigframes.session.Session)
27+
28+
29+
def test_create_external_table_ddl():
30+
sql = bigframes.core.sql.table.create_external_table_ddl(
31+
"my-project.my_dataset.my_table",
32+
columns={"col1": "INT64", "col2": "STRING"},
33+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
34+
)
35+
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
36+
assert sql == expected
37+
38+
39+
def test_create_external_table_ddl_replace():
40+
sql = bigframes.core.sql.table.create_external_table_ddl(
41+
"my-project.my_dataset.my_table",
42+
replace=True,
43+
columns={"col1": "INT64", "col2": "STRING"},
44+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
45+
)
46+
expected = "CREATE OR REPLACE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
47+
assert sql == expected
48+
49+
50+
def test_create_external_table_ddl_if_not_exists():
51+
sql = bigframes.core.sql.table.create_external_table_ddl(
52+
"my-project.my_dataset.my_table",
53+
if_not_exists=True,
54+
columns={"col1": "INT64", "col2": "STRING"},
55+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
56+
)
57+
expected = "CREATE EXTERNAL TABLE IF NOT EXISTS my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
58+
assert sql == expected
59+
60+
61+
def test_create_external_table_ddl_partition_columns():
62+
sql = bigframes.core.sql.table.create_external_table_ddl(
63+
"my-project.my_dataset.my_table",
64+
columns={"col1": "INT64", "col2": "STRING"},
65+
partition_columns={"part1": "DATE", "part2": "STRING"},
66+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
67+
)
68+
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH PARTITION COLUMNS (part1 DATE, part2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
69+
assert sql == expected
70+
71+
72+
def test_create_external_table_ddl_connection():
73+
sql = bigframes.core.sql.table.create_external_table_ddl(
74+
"my-project.my_dataset.my_table",
75+
columns={"col1": "INT64", "col2": "STRING"},
76+
connection_name="my-connection",
77+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
78+
)
79+
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) WITH CONNECTION `my-connection` OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
80+
assert sql == expected
81+
82+
83+
@mock.patch("bigframes.bigquery.table._get_table_metadata")
84+
def test_create_external_table(get_table_metadata_mock, mock_session):
85+
bigframes.bigquery.table.create_external_table(
86+
"my-project.my_dataset.my_table",
87+
columns={"col1": "INT64", "col2": "STRING"},
88+
options={"format": "CSV", "uris": ["gs://bucket/path*"]},
89+
session=mock_session,
90+
)
91+
mock_session.read_gbq_query.assert_called_once()
92+
generated_sql = mock_session.read_gbq_query.call_args[0][0]
93+
expected = "CREATE EXTERNAL TABLE my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (format = 'CSV', uris = ['gs://bucket/path*'])"
94+
assert generated_sql == expected
95+
get_table_metadata_mock.assert_called_once()

0 commit comments

Comments
 (0)