Skip to content

Commit b402216

Browse files
authored
Merge pull request #2561 from dhermes/bigquery-iterators-alt
Updating Client.list_* methods in BigQuery to use Iterators.
2 parents e575f81 + f8ab149 commit b402216

File tree

4 files changed

+106
-75
lines changed

4 files changed

+106
-75
lines changed

bigquery/google/cloud/bigquery/client.py

Lines changed: 72 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from google.cloud.bigquery.job import LoadTableFromStorageJob
2424
from google.cloud.bigquery.job import QueryJob
2525
from google.cloud.bigquery.query import QueryResults
26+
from google.cloud.iterator import Iterator
2627

2728

2829
class Project(object):
@@ -87,26 +88,13 @@ def list_projects(self, max_results=None, page_token=None):
8788
not passed, the API will return the first page of
8889
projects.
8990
90-
:rtype: tuple, (list, str)
91-
:returns: list of :class:`~google.cloud.bigquery.client.Project`,
92-
plus a "next page token" string: if the token is not None,
93-
indicates that more projects can be retrieved with another
94-
call (pass that value as ``page_token``).
91+
:rtype: :class:`~google.cloud.iterator.Iterator`
92+
:returns: Iterator of :class:`~google.cloud.bigquery.client.Project`
93+
accessible to the current client.
9594
"""
96-
params = {}
97-
98-
if max_results is not None:
99-
params['maxResults'] = max_results
100-
101-
if page_token is not None:
102-
params['pageToken'] = page_token
103-
104-
path = '/projects'
105-
resp = self.connection.api_request(method='GET', path=path,
106-
query_params=params)
107-
projects = [Project.from_api_repr(resource)
108-
for resource in resp.get('projects', ())]
109-
return projects, resp.get('nextPageToken')
95+
return Iterator(client=self, path='/projects',
96+
items_key='projects', item_to_value=_item_to_project,
97+
page_token=page_token, max_results=max_results)
11098

11199
def list_datasets(self, include_all=False, max_results=None,
112100
page_token=None):
@@ -127,29 +115,18 @@ def list_datasets(self, include_all=False, max_results=None,
127115
not passed, the API will return the first page of
128116
datasets.
129117
130-
:rtype: tuple, (list, str)
131-
:returns: list of :class:`~google.cloud.bigquery.dataset.Dataset`,
132-
plus a "next page token" string: if the token is not None,
133-
indicates that more datasets can be retrieved with another
134-
call (pass that value as ``page_token``).
118+
:rtype: :class:`~google.cloud.iterator.Iterator`
119+
:returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
120+
accessible to the current client.
135121
"""
136-
params = {}
137-
122+
extra_params = {}
138123
if include_all:
139-
params['all'] = True
140-
141-
if max_results is not None:
142-
params['maxResults'] = max_results
143-
144-
if page_token is not None:
145-
params['pageToken'] = page_token
146-
124+
extra_params['all'] = True
147125
path = '/projects/%s/datasets' % (self.project,)
148-
resp = self.connection.api_request(method='GET', path=path,
149-
query_params=params)
150-
datasets = [Dataset.from_api_repr(resource, self)
151-
for resource in resp.get('datasets', ())]
152-
return datasets, resp.get('nextPageToken')
126+
return Iterator(
127+
client=self, path=path, items_key='datasets',
128+
item_to_value=_item_to_dataset, page_token=page_token,
129+
max_results=max_results, extra_params=extra_params)
153130

154131
def dataset(self, dataset_name):
155132
"""Construct a dataset bound to this client.
@@ -215,32 +192,22 @@ def list_jobs(self, max_results=None, page_token=None, all_users=None,
215192
* ``"pending"``
216193
* ``"running"``
217194
218-
:rtype: tuple, (list, str)
219-
:returns: list of job instances, plus a "next page token" string:
220-
if the token is not ``None``, indicates that more jobs can be
221-
retrieved with another call, passing that value as
222-
``page_token``).
195+
:rtype: :class:`~google.cloud.iterator.Iterator`
196+
:returns: Iterable of job instances.
223197
"""
224-
params = {'projection': 'full'}
225-
226-
if max_results is not None:
227-
params['maxResults'] = max_results
228-
229-
if page_token is not None:
230-
params['pageToken'] = page_token
198+
extra_params = {'projection': 'full'}
231199

232200
if all_users is not None:
233-
params['allUsers'] = all_users
201+
extra_params['allUsers'] = all_users
234202

235203
if state_filter is not None:
236-
params['stateFilter'] = state_filter
204+
extra_params['stateFilter'] = state_filter
237205

238206
path = '/projects/%s/jobs' % (self.project,)
239-
resp = self.connection.api_request(method='GET', path=path,
240-
query_params=params)
241-
jobs = [self.job_from_resource(resource)
242-
for resource in resp.get('jobs', ())]
243-
return jobs, resp.get('nextPageToken')
207+
return Iterator(
208+
client=self, path=path, items_key='jobs',
209+
item_to_value=_item_to_job, page_token=page_token,
210+
max_results=max_results, extra_params=extra_params)
244211

245212
def load_table_from_storage(self, job_name, destination, *source_uris):
246213
"""Construct a job for loading data into a table from CloudStorage.
@@ -334,3 +301,50 @@ def run_sync_query(self, query):
334301
:returns: a new ``QueryResults`` instance
335302
"""
336303
return QueryResults(query, client=self)
304+
305+
306+
# pylint: disable=unused-argument
307+
def _item_to_project(iterator, resource):
308+
"""Convert a JSON project to the native object.
309+
310+
:type iterator: :class:`~google.cloud.iterator.Iterator`
311+
:param iterator: The iterator that is currently in use.
312+
313+
:type resource: dict
314+
:param resource: An item to be converted to a project.
315+
316+
:rtype: :class:`.Project`
317+
:returns: The next project in the page.
318+
"""
319+
return Project.from_api_repr(resource)
320+
# pylint: enable=unused-argument
321+
322+
323+
def _item_to_dataset(iterator, resource):
324+
"""Convert a JSON dataset to the native object.
325+
326+
:type iterator: :class:`~google.cloud.iterator.Iterator`
327+
:param iterator: The iterator that is currently in use.
328+
329+
:type resource: dict
330+
:param resource: An item to be converted to a dataset.
331+
332+
:rtype: :class:`.Dataset`
333+
:returns: The next dataset in the page.
334+
"""
335+
return Dataset.from_api_repr(resource, iterator.client)
336+
337+
338+
def _item_to_job(iterator, resource):
339+
"""Convert a JSON job to the native object.
340+
341+
:type iterator: :class:`~google.cloud.iterator.Iterator`
342+
:param iterator: The iterator that is currently in use.
343+
344+
:type resource: dict
345+
:param resource: An item to be converted to a job.
346+
347+
:rtype: job instance.
348+
:returns: The next job in the page.
349+
"""
350+
return iterator.client.job_from_resource(resource)

bigquery/unit_tests/test_client.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,10 @@ def test_list_projects_defaults(self):
5959
client = self._makeOne(PROJECT_1, creds)
6060
conn = client.connection = _Connection(DATA)
6161

62-
projects, token = client.list_projects()
62+
iterator = client.list_projects()
63+
iterator.update_page()
64+
projects = list(iterator.page)
65+
token = iterator.next_page_token
6366

6467
self.assertEqual(len(projects), len(DATA['projects']))
6568
for found, expected in zip(projects, DATA['projects']):
@@ -83,7 +86,10 @@ def test_list_projects_explicit_response_missing_projects_key(self):
8386
client = self._makeOne(PROJECT, creds)
8487
conn = client.connection = _Connection(DATA)
8588

86-
projects, token = client.list_projects(max_results=3, page_token=TOKEN)
89+
iterator = client.list_projects(max_results=3, page_token=TOKEN)
90+
iterator.update_page()
91+
projects = list(iterator.page)
92+
token = iterator.next_page_token
8793

8894
self.assertEqual(len(projects), 0)
8995
self.assertIsNone(token)
@@ -121,7 +127,10 @@ def test_list_datasets_defaults(self):
121127
client = self._makeOne(PROJECT, creds)
122128
conn = client.connection = _Connection(DATA)
123129

124-
datasets, token = client.list_datasets()
130+
iterator = client.list_datasets()
131+
iterator.update_page()
132+
datasets = list(iterator.page)
133+
token = iterator.next_page_token
125134

126135
self.assertEqual(len(datasets), len(DATA['datasets']))
127136
for found, expected in zip(datasets, DATA['datasets']):
@@ -144,8 +153,11 @@ def test_list_datasets_explicit_response_missing_datasets_key(self):
144153
client = self._makeOne(PROJECT, creds)
145154
conn = client.connection = _Connection(DATA)
146155

147-
datasets, token = client.list_datasets(
156+
iterator = client.list_datasets(
148157
include_all=True, max_results=3, page_token=TOKEN)
158+
iterator.update_page()
159+
datasets = list(iterator.page)
160+
token = iterator.next_page_token
149161

150162
self.assertEqual(len(datasets), 0)
151163
self.assertIsNone(token)
@@ -288,7 +300,10 @@ def test_list_jobs_defaults(self):
288300
client = self._makeOne(PROJECT, creds)
289301
conn = client.connection = _Connection(DATA)
290302

291-
jobs, token = client.list_jobs()
303+
iterator = client.list_jobs()
304+
iterator.update_page()
305+
jobs = list(iterator.page)
306+
token = iterator.next_page_token
292307

293308
self.assertEqual(len(jobs), len(DATA['jobs']))
294309
for found, expected in zip(jobs, DATA['jobs']):
@@ -340,7 +355,10 @@ def test_list_jobs_load_job_wo_sourceUris(self):
340355
client = self._makeOne(PROJECT, creds)
341356
conn = client.connection = _Connection(DATA)
342357

343-
jobs, token = client.list_jobs()
358+
iterator = client.list_jobs()
359+
iterator.update_page()
360+
jobs = list(iterator.page)
361+
token = iterator.next_page_token
344362

345363
self.assertEqual(len(jobs), len(DATA['jobs']))
346364
for found, expected in zip(jobs, DATA['jobs']):
@@ -364,8 +382,11 @@ def test_list_jobs_explicit_missing(self):
364382
client = self._makeOne(PROJECT, creds)
365383
conn = client.connection = _Connection(DATA)
366384

367-
jobs, token = client.list_jobs(max_results=1000, page_token=TOKEN,
368-
all_users=True, state_filter='done')
385+
iterator = client.list_jobs(max_results=1000, page_token=TOKEN,
386+
all_users=True, state_filter='done')
387+
iterator.update_page()
388+
jobs = list(iterator.page)
389+
token = iterator.next_page_token
369390

370391
self.assertEqual(len(jobs), 0)
371392
self.assertIsNone(token)

docs/bigquery_snippets.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,8 @@ def do_something_with(_):
7575
pass
7676

7777
# [START client_list_datasets]
78-
datasets, token = client.list_datasets() # API request
79-
while True:
80-
for dataset in datasets:
81-
do_something_with(dataset)
82-
if token is None:
83-
break
84-
datasets, token = client.list_datasets(page_token=token) # API request
78+
for dataset in client.list_datasets(): # API request(s)
79+
do_something_with(dataset)
8580
# [END client_list_datasets]
8681

8782

system_tests/bigquery.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ def test_list_datasets(self):
151151
self.to_delete.append(dataset)
152152

153153
# Retrieve the datasets.
154-
all_datasets, token = Config.CLIENT.list_datasets()
155-
self.assertIsNone(token)
154+
iterator = Config.CLIENT.list_datasets()
155+
all_datasets = list(iterator)
156+
self.assertIsNone(iterator.next_page_token)
156157
created = [dataset for dataset in all_datasets
157158
if dataset.name in datasets_to_create and
158159
dataset.project == Config.CLIENT.project]

0 commit comments

Comments
 (0)