Skip to content

Commit 3e8fbae

Browse files
authored
docs(bigquery): document how to achieve higher write limit and add tests (#9574)
* test(bigquery): add insert_rows*() tests w/o row IDs * Groom the insert_rows_json() method's docstring * docs: document how to achieve higher insert write limit * Make method names less confusing for insert IDs
1 parent 48359eb commit 3e8fbae

File tree

5 files changed

+206
-16
lines changed

5 files changed

+206
-16
lines changed

bigquery/docs/usage/tables.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ Insert rows into a table's data with the
122122
:start-after: [START bigquery_table_insert_rows]
123123
:end-before: [END bigquery_table_insert_rows]
124124

125+
Insert rows into a table's data with the
126+
:func:`~google.cloud.bigquery.client.Client.insert_rows` method, achieving
127+
higher write limit:
128+
129+
.. literalinclude:: ../samples/table_insert_rows_explicit_none_insert_ids.py
130+
:language: python
131+
:dedent: 4
132+
:start-after: [START bigquery_table_insert_rows_explicit_none_insert_ids]
133+
:end-before: [END bigquery_table_insert_rows_explicit_none_insert_ids]
134+
135+
Mind that inserting data with ``None`` row insert IDs can come at the expense of
136+
more duplicate inserts. See also:
137+
`Streaming inserts <https://cloud.google.com/bigquery/quotas#streaming_inserts>`_.
138+
125139
Add an empty column to the existing table with the
126140
:func:`~google.cloud.bigquery.update_table` method:
127141

bigquery/google/cloud/bigquery/client.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2264,29 +2264,32 @@ def insert_rows_json(
22642264
table (Union[ \
22652265
google.cloud.bigquery.table.Table \
22662266
google.cloud.bigquery.table.TableReference, \
2267-
str, \
2267+
str \
22682268
]):
22692269
The destination table for the row data, or a reference to it.
22702270
json_rows (Sequence[Dict]):
22712271
Row data to be inserted. Keys must match the table schema fields
22722272
and values must be JSON-compatible representations.
2273-
row_ids (Sequence[str]):
2274-
(Optional) Unique ids, one per row being inserted. If omitted,
2275-
unique IDs are created.
2276-
skip_invalid_rows (bool):
2277-
(Optional) Insert all valid rows of a request, even if invalid
2278-
rows exist. The default value is False, which causes the entire
2279-
request to fail if any invalid rows exist.
2280-
ignore_unknown_values (bool):
2281-
(Optional) Accept rows that contain values that do not match the
2282-
schema. The unknown values are ignored. Default is False, which
2273+
row_ids (Optional[Sequence[Optional[str]]]):
2274+
Unique IDs, one per row being inserted. An ID can also be
2275+
``None``, indicating that an explicit insert ID should **not**
2276+
be used for that row. If the argument is omitted altogether,
2277+
unique IDs are created automatically.
2278+
skip_invalid_rows (Optional[bool]):
2279+
Insert all valid rows of a request, even if invalid rows exist.
2280+
The default value is ``False``, which causes the entire request
2281+
to fail if any invalid rows exist.
2282+
ignore_unknown_values (Optional[bool]):
2283+
Accept rows that contain values that do not match the schema.
2284+
The unknown values are ignored. Default is ``False``, which
22832285
treats unknown values as errors.
2284-
template_suffix (str):
2285-
(Optional) treat ``name`` as a template table and provide a suffix.
2286-
BigQuery will create the table ``<name> + <template_suffix>`` based
2287-
on the schema of the template table. See
2286+
template_suffix (Optional[str]):
2287+
Treat ``name`` as a template table and provide a suffix.
2288+
BigQuery will create the table ``<name> + <template_suffix>``
2289+
based on the schema of the template table. See
22882290
https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
2289-
retry (google.api_core.retry.Retry): (Optional) How to retry the RPC.
2291+
retry (Optional[google.api_core.retry.Retry]):
2292+
How to retry the RPC.
22902293
22912294
Returns:
22922295
Sequence[Mappings]:
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def table_insert_rows_explicit_none_insert_ids(client, table_id):
17+
18+
# [START bigquery_table_insert_rows_explicit_none_insert_ids]
19+
# TODO(developer): Import the client library.
20+
# from google.cloud import bigquery
21+
22+
# TODO(developer): Construct a BigQuery client object.
23+
# client = bigquery.Client()
24+
25+
# TODO(developer): Set table_id to the ID of the model to fetch.
26+
# table_id = "your-project.your_dataset.your_table"
27+
28+
table = client.get_table(table_id) # Make an API request.
29+
rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)]
30+
31+
errors = client.insert_rows(
32+
table, rows_to_insert, row_ids=[None] * len(rows_to_insert)
33+
) # Make an API request.
34+
if errors == []:
35+
print("New rows have been added.")
36+
# [END bigquery_table_insert_rows_explicit_none_insert_ids]
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from google.cloud import bigquery
17+
18+
from .. import table_insert_rows_explicit_none_insert_ids as mut
19+
20+
21+
def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id):
22+
23+
schema = [
24+
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
25+
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
26+
]
27+
28+
table = bigquery.Table(random_table_id, schema=schema)
29+
table = client.create_table(table)
30+
31+
mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id)
32+
out, err = capsys.readouterr()
33+
assert "New rows have been added." in out

bigquery/tests/unit/test_client.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4572,6 +4572,40 @@ def test_insert_rows_w_record_schema(self):
45724572
method="POST", path="/%s" % PATH, data=SENT
45734573
)
45744574

4575+
def test_insert_rows_w_explicit_none_insert_ids(self):
4576+
from google.cloud.bigquery.schema import SchemaField
4577+
from google.cloud.bigquery.table import Table
4578+
4579+
PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format(
4580+
self.PROJECT, self.DS_ID, self.TABLE_ID,
4581+
)
4582+
creds = _make_credentials()
4583+
http = object()
4584+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
4585+
conn = client._connection = make_connection({})
4586+
schema = [
4587+
SchemaField("full_name", "STRING", mode="REQUIRED"),
4588+
SchemaField("age", "INTEGER", mode="REQUIRED"),
4589+
]
4590+
table = Table(self.TABLE_REF, schema=schema)
4591+
ROWS = [
4592+
{"full_name": "Phred Phlyntstone", "age": 32},
4593+
{"full_name": "Bharney Rhubble", "age": 33},
4594+
]
4595+
4596+
def _row_data(row):
4597+
row["age"] = str(row["age"])
4598+
return row
4599+
4600+
SENT = {"rows": [{"json": _row_data(row), "insertId": None} for row in ROWS]}
4601+
4602+
errors = client.insert_rows(table, ROWS, row_ids=[None] * len(ROWS))
4603+
4604+
self.assertEqual(len(errors), 0)
4605+
conn.api_request.assert_called_once_with(
4606+
method="POST", path="/{}".format(PATH), data=SENT
4607+
)
4608+
45754609
def test_insert_rows_errors(self):
45764610
from google.cloud.bigquery.table import Table
45774611

@@ -4765,6 +4799,55 @@ def test_insert_rows_from_dataframe_many_columns(self):
47654799
assert len(actual_calls) == 1
47664800
assert actual_calls[0] == expected_call
47674801

4802+
@unittest.skipIf(pandas is None, "Requires `pandas`")
4803+
def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
4804+
from google.cloud.bigquery.table import SchemaField
4805+
from google.cloud.bigquery.table import Table
4806+
4807+
API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format(
4808+
self.PROJECT, self.DS_ID, self.TABLE_REF.table_id
4809+
)
4810+
4811+
dataframe = pandas.DataFrame(
4812+
[
4813+
{"name": u"Little One", "adult": False},
4814+
{"name": u"Young Gun", "adult": True},
4815+
]
4816+
)
4817+
4818+
# create client
4819+
creds = _make_credentials()
4820+
http = object()
4821+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
4822+
conn = client._connection = make_connection({}, {})
4823+
4824+
# create table
4825+
schema = [
4826+
SchemaField("name", "STRING", mode="REQUIRED"),
4827+
SchemaField("adult", "BOOLEAN", mode="REQUIRED"),
4828+
]
4829+
table = Table(self.TABLE_REF, schema=schema)
4830+
4831+
error_info = client.insert_rows_from_dataframe(
4832+
table, dataframe, row_ids=[None] * len(dataframe)
4833+
)
4834+
4835+
self.assertEqual(len(error_info), 1)
4836+
assert error_info[0] == [] # no chunk errors
4837+
4838+
EXPECTED_SENT_DATA = {
4839+
"rows": [
4840+
{"insertId": None, "json": {"name": "Little One", "adult": "false"}},
4841+
{"insertId": None, "json": {"name": "Young Gun", "adult": "true"}},
4842+
]
4843+
}
4844+
4845+
actual_calls = conn.api_request.call_args_list
4846+
assert len(actual_calls) == 1
4847+
assert actual_calls[0] == mock.call(
4848+
method="POST", path=API_PATH, data=EXPECTED_SENT_DATA
4849+
)
4850+
47684851
def test_insert_rows_json(self):
47694852
from google.cloud.bigquery.table import Table, SchemaField
47704853
from google.cloud.bigquery.dataset import DatasetReference
@@ -4833,6 +4916,27 @@ def test_insert_rows_json_with_string_id(self):
48334916
data=expected,
48344917
)
48354918

4919+
def test_insert_rows_json_w_explicit_none_insert_ids(self):
4920+
rows = [{"col1": "val1"}, {"col2": "val2"}]
4921+
creds = _make_credentials()
4922+
http = object()
4923+
client = self._make_one(
4924+
project="default-project", credentials=creds, _http=http
4925+
)
4926+
conn = client._connection = make_connection({})
4927+
4928+
errors = client.insert_rows_json(
4929+
"proj.dset.tbl", rows, row_ids=[None] * len(rows),
4930+
)
4931+
4932+
self.assertEqual(len(errors), 0)
4933+
expected = {"rows": [{"json": row, "insertId": None} for row in rows]}
4934+
conn.api_request.assert_called_once_with(
4935+
method="POST",
4936+
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
4937+
data=expected,
4938+
)
4939+
48364940
def test_list_partitions(self):
48374941
from google.cloud.bigquery.table import Table
48384942

0 commit comments

Comments
 (0)