Skip to content

Commit

Permalink
TST: Fix gbq integration tests. gbq._Dataset.dataset() would not retu…
Browse files Browse the repository at this point in the history
…rn full results

This PR resolves an issue where `gbq._Dataset.datasets()` would not return
all datasets under a Google BigQuery project.

If `'nextPageToken'` is populated, then another `datasets().list()`
request should be sent with `'pageToken'` set to collect more results.
In the past few days, additional datasets were added under the Google
BigQuery project id used by pandas as part of the following github
project : https://github.com/pydata/pandas-gbq .

The addition of datasets caused many gbq unit tests to fail because in function
`clean_gbq_environment()`, we check to see if the dataset exists using
the incomplete results from `gbq._Dataset.datasets()` before we
attempt to delete it.

Author: Anthonios Partheniou <apartheniou@electricalengineer.ca>

Closes pandas-dev#15381 from parthea/fix-broken-gbq-unit-tests and squashes the following commits:

61bc1e7 [Anthonios Partheniou] TST: Fix gbq tests. gbq.dataset()/gbq.tables would not return full results.
  • Loading branch information
parthea authored and jreback committed Feb 14, 2017
1 parent d9e75c7 commit 86ca84d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 31 deletions.
67 changes: 44 additions & 23 deletions pandas/io/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,21 +1056,32 @@ def datasets(self):
List of datasets under the specific project
"""

try:
list_dataset_response = self.service.datasets().list(
projectId=self.project_id).execute().get('datasets', None)
dataset_list = []
next_page_token = None
first_query = True

if not list_dataset_response:
return []
while first_query or next_page_token:
first_query = False

dataset_list = list()
try:
list_dataset_response = self.service.datasets().list(
projectId=self.project_id,
pageToken=next_page_token).execute()

for row_num, raw_row in enumerate(list_dataset_response):
dataset_list.append(raw_row['datasetReference']['datasetId'])
dataset_response = list_dataset_response.get('datasets')
next_page_token = list_dataset_response.get('nextPageToken')

return dataset_list
except self.http_error as ex:
self.process_http_error(ex)
if not dataset_response:
return dataset_list

for row_num, raw_row in enumerate(dataset_response):
dataset_list.append(
raw_row['datasetReference']['datasetId'])

except self.http_error as ex:
self.process_http_error(ex)

return dataset_list

def create(self, dataset_id):
""" Create a dataset in Google BigQuery
Expand Down Expand Up @@ -1140,19 +1151,29 @@ def tables(self, dataset_id):
List of tables under the specific dataset
"""

try:
list_table_response = self.service.tables().list(
projectId=self.project_id,
datasetId=dataset_id).execute().get('tables', None)
table_list = []
next_page_token = None
first_query = True

if not list_table_response:
return []
while first_query or next_page_token:
first_query = False

table_list = list()
try:
list_table_response = self.service.tables().list(
projectId=self.project_id,
datasetId=dataset_id,
pageToken=next_page_token).execute()

for row_num, raw_row in enumerate(list_table_response):
table_list.append(raw_row['tableReference']['tableId'])
table_response = list_table_response.get('tables')
next_page_token = list_table_response.get('nextPageToken')

return table_list
except self.http_error as ex:
self.process_http_error(ex)
if not table_response:
return table_list

for row_num, raw_row in enumerate(table_response):
table_list.append(raw_row['tableReference']['tableId'])

except self.http_error as ex:
self.process_http_error(ex)

return table_list
16 changes: 8 additions & 8 deletions pandas/tests/io/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def test_generate_bq_schema_deprecated():
gbq.generate_bq_schema(df)


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase):

def setUp(self):
Expand Down Expand Up @@ -299,7 +299,7 @@ def test_get_application_default_credentials_returns_credentials(self):
self.assertTrue(isinstance(credentials, GoogleCredentials))


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase):
def setUp(self):
_setup_common()
Expand Down Expand Up @@ -331,7 +331,7 @@ def test_should_be_able_to_get_results_from_query(self):
self.assertTrue(pages is not None)


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase):
def setUp(self):
_setup_common()
Expand Down Expand Up @@ -449,7 +449,7 @@ def test_read_gbq_with_corrupted_private_key_json_should_fail(self):
private_key=re.sub('[a-z]', '9', _get_private_key_contents()))


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestReadGBQIntegration(tm.TestCase):

@classmethod
Expand Down Expand Up @@ -503,7 +503,7 @@ def test_should_read_as_service_account_with_key_contents(self):
tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):

@classmethod
Expand Down Expand Up @@ -906,7 +906,7 @@ def test_configuration_without_query(self):
configuration=config)


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
# As a workaround to this issue, each test should use a unique table name.
Expand Down Expand Up @@ -1219,7 +1219,7 @@ def test_dataset_does_not_exist(self):
DATASET_ID + "_not_found"), 'Expected dataset not to exist')


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase):
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
# As a workaround to this issue, each test should use a unique table name.
Expand Down Expand Up @@ -1277,7 +1277,7 @@ def test_upload_data(self):
self.assertEqual(result['num_rows'][0], test_size)


@pytest.mark.xfail(run=False, reason="flaky tests")
@pytest.mark.single
class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase):
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015
# As a workaround to this issue, each test should use a unique table name.
Expand Down

0 comments on commit 86ca84d

Please sign in to comment.