-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optionally include indexes in table written by `load_table_from_dataf…
…rame`. (#9084) * Specify the index data type in partial schema to `load_table_from_dataframe` to include it. If an index (or level of a multi-index) has a name and is present in the schema passed to `load_table_from_dataframe`, then that index will be serialized and written to the table. Otherwise, the index is omitted from the serialized table. * Don't include index if has same name as column name. * Move `load_table_dataframe` sample from `snippets.py` to `samples/`. Sample now demonstrates how to manually include the index with a partial schema definition. Update docs reference to new `load_table_dataframe` sample location.
- Loading branch information
Showing
7 changed files
with
421 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
def load_table_dataframe(client, table_id): | ||
# [START bigquery_load_table_dataframe] | ||
from google.cloud import bigquery | ||
import pandas | ||
|
||
# TODO(developer): Construct a BigQuery client object. | ||
# client = bigquery.Client() | ||
|
||
# TODO(developer): Set table_id to the ID of the table to create. | ||
# table_id = "your-project.your_dataset.your_table_name" | ||
|
||
records = [ | ||
{"title": u"The Meaning of Life", "release_year": 1983}, | ||
{"title": u"Monty Python and the Holy Grail", "release_year": 1975}, | ||
{"title": u"Life of Brian", "release_year": 1979}, | ||
{"title": u"And Now for Something Completely Different", "release_year": 1971}, | ||
] | ||
dataframe = pandas.DataFrame( | ||
records, | ||
# In the loaded table, the column order reflects the order of the | ||
# columns in the DataFrame. | ||
columns=["title", "release_year"], | ||
# Optionally, set a named index, which can also be written to the | ||
# BigQuery table. | ||
index=pandas.Index( | ||
[u"Q24980", u"Q25043", u"Q24953", u"Q16403"], name="wikidata_id" | ||
), | ||
) | ||
job_config = bigquery.LoadJobConfig( | ||
# Specify a (partial) schema. All columns are always written to the | ||
# table. The schema is used to assist in data type definitions. | ||
schema=[ | ||
# Specify the type of columns whose type cannot be auto-detected. For | ||
# example the "title" column uses pandas dtype "object", so its | ||
# data type is ambiguous. | ||
bigquery.SchemaField("title", bigquery.enums.SqlTypeNames.STRING), | ||
# Indexes are written if included in the schema by name. | ||
bigquery.SchemaField("wikidata_id", bigquery.enums.SqlTypeNames.STRING), | ||
], | ||
# Optionally, set the write disposition. BigQuery appends loaded rows | ||
# to an existing table by default, but with WRITE_TRUNCATE write | ||
# disposition it replaces the table with the loaded data. | ||
write_disposition="WRITE_TRUNCATE", | ||
) | ||
|
||
job = client.load_table_from_dataframe( | ||
dataframe, table_id, job_config=job_config, location="US" | ||
) | ||
job.result() # Waits for table load to complete. | ||
|
||
table = client.get_table(table_id) | ||
print( | ||
"Loaded {} rows and {} columns to {}".format( | ||
table.num_rows, len(table.schema), table_id | ||
) | ||
) | ||
# [END bigquery_load_table_dataframe] | ||
return table |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pytest | ||
|
||
from .. import load_table_dataframe | ||
|
||
|
||
pytest.importorskip("pandas") | ||
pytest.importorskip("pyarrow") | ||
|
||
|
||
def test_load_table_dataframe(capsys, client, random_table_id): | ||
table = load_table_dataframe.load_table_dataframe(client, random_table_id) | ||
out, _ = capsys.readouterr() | ||
assert "Loaded 4 rows and 3 columns" in out | ||
|
||
column_names = [field.name for field in table.schema] | ||
assert column_names == ["wikidata_id", "title", "release_year"] |
Oops, something went wrong.