Skip to content

test: add retries to flaky datalab-migration test #8435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 2, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 40 additions & 36 deletions bigquery/datalab-migration/samples_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@

import time

from google.api_core.retry import Retry
import google.auth
import google.datalab
import IPython
from IPython.terminal import interactiveshell
from IPython.testing import tools
import pytest


# Get default project
_, PROJECT_ID = google.auth.default()
# Set Datalab project ID
Expand Down Expand Up @@ -270,46 +270,50 @@ def test_client_library_load_table_from_gcs_csv(to_delete):


def test_datalab_load_table_from_dataframe(to_delete):
# [START bigquery_migration_datalab_load_table_from_dataframe]
import google.datalab.bigquery as bq
import pandas

# Create the dataset
dataset_id = 'import_sample'
# [END bigquery_migration_datalab_load_table_from_dataframe]
# Use unique dataset ID to avoid collisions when running tests
dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000))
to_delete.append(dataset_id)
# [START bigquery_migration_datalab_load_table_from_dataframe]
bq.Dataset(dataset_id).create()

# Create the table and load the data
dataframe = pandas.DataFrame([
{'title': 'The Meaning of Life', 'release_year': 1983},
{'title': 'Monty Python and the Holy Grail', 'release_year': 1975},
{'title': 'Life of Brian', 'release_year': 1979},
{
'title': 'And Now for Something Completely Different',
'release_year': 1971
},
])
schema = bq.Schema.from_data(dataframe)
table = bq.Table(
'{}.monty_python'.format(dataset_id)).create(schema=schema)
table.insert(dataframe) # Starts steaming insert of data
# [END bigquery_migration_datalab_load_table_from_dataframe]
# The Datalab library uses tabledata().insertAll() to load data from
# pandas DataFrames to tables. Because it can take a long time for the rows
# to be available in the table, this test does not assert on the number of
# rows in the destination table after the job is run. If errors are
# encountered during the insertion, this test will fail.
# See https://cloud.google.com/bigquery/streaming-data-into-bigquery
""" Wrap test with retries to handle transient errors """
@Retry()
def datalab_load_table_from_dataframe(to_delete):
# [START bigquery_migration_datalab_load_table_from_dataframe]
import google.datalab.bigquery as bq
import pandas

# Create the dataset
dataset_id = 'import_sample'
# [END bigquery_migration_datalab_load_table_from_dataframe]
# Use unique dataset ID to avoid collisions when running tests
dataset_id = 'test_dataset_{}'.format(int(time.time() * 1000))
to_delete.append(dataset_id)
# [START bigquery_migration_datalab_load_table_from_dataframe]
bq.Dataset(dataset_id).create()

# Create the table and load the data
dataframe = pandas.DataFrame([
{'title': 'The Meaning of Life', 'release_year': 1983},
{'title': 'Monty Python and the Holy Grail', 'release_year': 1975},
{'title': 'Life of Brian', 'release_year': 1979},
{
'title': 'And Now for Something Completely Different',
'release_year': 1971
},
])
schema = bq.Schema.from_data(dataframe)
table = bq.Table(
'{}.monty_python'.format(dataset_id)).create(schema=schema)
table.insert(dataframe) # Starts steaming insert of data
# [END bigquery_migration_datalab_load_table_from_dataframe]
# The Datalab library uses tabledata().insertAll() to load data from
# pandas DataFrames to tables. Because it can take a long time for the rows
# to be available in the table, this test does not assert on the number of
# rows in the destination table after the job is run. If errors are
# encountered during the insertion, this test will fail.
# See https://cloud.google.com/bigquery/streaming-data-into-bigquery
datalab_load_table_from_dataframe(to_delete)


def test_client_library_load_table_from_dataframe(to_delete):
# [START bigquery_migration_client_library_load_table_from_dataframe]
from google.cloud import bigquery
import pandas
from google.cloud import bigquery

client = bigquery.Client(location='US')

Expand Down