-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(bigquery): add support for sheets ranges #9416
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
def query_external_sheets_permanent_table(dataset_id): | ||
|
||
# [START bigquery_query_external_sheets_perm] | ||
from google.cloud import bigquery | ||
import google.auth | ||
|
||
# Create credentials with Drive & BigQuery API scopes. | ||
# Both APIs must be enabled for your project before running this code. | ||
credentials, project = google.auth.default( | ||
scopes=[ | ||
"https://www.googleapis.com/auth/drive", | ||
"https://www.googleapis.com/auth/bigquery", | ||
] | ||
) | ||
|
||
# TODO(developer): Construct a BigQuery client object. | ||
client = bigquery.Client(credentials=credentials, project=project) | ||
|
||
# TODO(developer): Set dataset_id to the ID of the dataset to fetch. | ||
# dataset_id = "your-project.your_dataset" | ||
|
||
# Configure the external data source. | ||
dataset = client.get_dataset(dataset_id) | ||
table_id = "us_states" | ||
schema = [ | ||
bigquery.SchemaField("name", "STRING"), | ||
bigquery.SchemaField("post_abbr", "STRING"), | ||
] | ||
table = bigquery.Table(dataset.table(table_id), schema=schema) | ||
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") | ||
# Use a shareable link or grant viewing access to the email address you | ||
# used to authenticate with BigQuery (this example Sheet is public). | ||
sheet_url = ( | ||
"https://docs.google.com/spreadsheets/" | ||
"d/1dCG0rrY0nkJpB8t6Ko1S3tY7w9-hp0e_jsYer4LgEuA/edit?usp=sharing" | ||
) | ||
external_config.source_uris = [sheet_url] | ||
external_config.options.skip_leading_rows = 1 # Optionally skip header row. | ||
external_config.options.range = ( | ||
"Sheet1!A10:B30" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use a range that is more possible to verify the correct behavior. Since the original sheet is sorted alphabetically, the range There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I decided to use |
||
) # Optionally set range of the sheet to query from. | ||
table.external_data_configuration = external_config | ||
|
||
# Create a permanent table linked to the Sheets file. | ||
table = client.create_table(table) # Make an API request. | ||
|
||
# Example query to find states starting with "W". | ||
sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id) | ||
query_job = client.query(sql) # Make an API request. | ||
|
||
# Wait for the query to complete. | ||
w_states = list(query_job) | ||
print("There are {} states with names starting with W.".format(len(w_states))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's update the print statement to indicate that we used a range: |
||
# [END bigquery_query_external_sheets_perm] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
def query_external_sheets_temporary_table(): | ||
|
||
# [START bigquery_query_external_sheets_temp] | ||
# [START bigquery_auth_drive_scope] | ||
from google.cloud import bigquery | ||
import google.auth | ||
|
||
# Create credentials with Drive & BigQuery API scopes. | ||
# Both APIs must be enabled for your project before running this code. | ||
credentials, project = google.auth.default( | ||
scopes=[ | ||
"https://www.googleapis.com/auth/drive", | ||
"https://www.googleapis.com/auth/bigquery", | ||
] | ||
) | ||
|
||
# TODO(developer): Construct a BigQuery client object. | ||
client = bigquery.Client(credentials=credentials, project=project) | ||
# [END bigquery_auth_drive_scope] | ||
|
||
# Configure the external data source and query job. | ||
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS") | ||
|
||
# Use a shareable link or grant viewing access to the email address you | ||
# used to authenticate with BigQuery (this example Sheet is public). | ||
sheet_url = ( | ||
"https://docs.google.com/spreadsheets/" | ||
"d/1dCG0rrY0nkJpB8t6Ko1S3tY7w9-hp0e_jsYer4LgEuA/edit?usp=sharing" | ||
) | ||
external_config.source_uris = [sheet_url] | ||
external_config.schema = [ | ||
bigquery.SchemaField("name", "STRING"), | ||
bigquery.SchemaField("post_abbr", "STRING"), | ||
] | ||
external_config.options.skip_leading_rows = 1 # Optionally skip header row. | ||
external_config.options.range = ( | ||
"Sheet1!A10:B30" | ||
) # Optionally set range of the sheet to query from. | ||
table_id = "us_states" | ||
job_config = bigquery.QueryJobConfig() | ||
job_config.table_definitions = {table_id: external_config} | ||
|
||
# Example query to find states starting with "W". | ||
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id) | ||
query_job = client.query(sql, job_config=job_config) # Make an API request. | ||
|
||
# Wait for the query to complete. | ||
w_states = list(query_job) | ||
print("There are {} states with names starting with W.".format(len(w_states))) | ||
# [END bigquery_query_external_sheets_temp] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
from .. import query_external_sheets_permanent_table | ||
|
||
|
||
def test_query_external_sheets_permanent_table(capsys, dataset_id): | ||
|
||
query_external_sheets_permanent_table.query_external_sheets_permanent_table( | ||
dataset_id | ||
) | ||
out, err = capsys.readouterr() | ||
assert "There are 4 states with names starting with W." in out |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
from .. import query_external_sheets_temporary_table | ||
|
||
|
||
def test_query_external_sheets_temporary_table(capsys): | ||
|
||
query_external_sheets_temporary_table.query_external_sheets_temporary_table() | ||
out, err = capsys.readouterr() | ||
assert "There are 4 states with names starting with W." in out |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, let's continue using the previous sheet. https://docs.google.com/spreadsheets/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing
I was thinking we'd want a different set of data for the range queries, but if we're still querying US States, we should just use the existing sheet.