Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: add avro load samples #6832

Merged
merged 2 commits into from
Dec 3, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,39 @@ def test_load_table_from_file(client, to_delete):
assert row2 in rows


def test_load_table_from_uri_avro(client, to_delete, capsys):
dataset_id = 'load_table_from_uri_avro_{}'.format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_load_table_gcs_avro]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'

dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.AVRO
uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.avro'

load_job = client.load_table_from_uri(
uri,
dataset_ref.table('us_states'),
job_config=job_config) # API request
print('Starting job {}'.format(load_job.job_id))

load_job.result() # Waits for table load to complete.
print('Job finished.')

destination_table = client.get_table(dataset_ref.table('us_states'))
print('Loaded {} rows.'.format(destination_table.num_rows))
# [END bigquery_load_table_gcs_avro]

out, _ = capsys.readouterr()
assert 'Loaded 50 rows.' in out


def test_load_table_from_uri_csv(client, to_delete, capsys):
dataset_id = "load_table_from_uri_csv_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down Expand Up @@ -1588,8 +1621,11 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys):
table_ref = dataset.table("us_states")
body = six.BytesIO(b"Washington,WA")
client.load_table_from_file(body, table_ref, job_config=job_config).result()
previous_rows = client.get_table(table_ref).num_rows
assert previous_rows > 0

# Shared code
# [START bigquery_load_table_gcs_avro_truncate]
# [START bigquery_load_table_gcs_csv_truncate]
# [START bigquery_load_table_gcs_json_truncate]
# [START bigquery_load_table_gcs_parquet_truncate]
Expand All @@ -1598,17 +1634,20 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys):
# client = bigquery.Client()
# table_ref = client.dataset('my_dataset').table('existing_table')

previous_rows = client.get_table(table_ref).num_rows
assert previous_rows > 0

job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
# [END bigquery_load_table_gcs_avro_truncate]
# [END bigquery_load_table_gcs_csv_truncate]
# [END bigquery_load_table_gcs_json_truncate]
# [END bigquery_load_table_gcs_parquet_truncate]
# [END bigquery_load_table_gcs_orc_truncate]

# Format-specific code
# [START bigquery_load_table_gcs_avro_truncate]
job_config.source_format = bigquery.SourceFormat.AVRO
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro"
# [END bigquery_load_table_gcs_avro_truncate]

# [START bigquery_load_table_gcs_csv_truncate]
job_config.skip_leading_rows = 1
# The source format defaults to CSV, so the line below is optional.
Expand All @@ -1634,6 +1673,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys):
# [END bigquery_load_table_gcs_orc_truncate]

# Shared code
# [START bigquery_load_table_gcs_avro_truncate]
# [START bigquery_load_table_gcs_csv_truncate]
# [START bigquery_load_table_gcs_json_truncate]
# [START bigquery_load_table_gcs_parquet_truncate]
Expand All @@ -1648,6 +1688,7 @@ def test_load_table_from_uri_truncate(client, to_delete, capsys):

destination_table = client.get_table(table_ref)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_avro_truncate]
# [END bigquery_load_table_gcs_csv_truncate]
# [END bigquery_load_table_gcs_json_truncate]
# [END bigquery_load_table_gcs_parquet_truncate]
Expand Down