Skip to content

Commit ba13299

Browse files
authored
Merge pull request googleapis#2531 from dhermes/revamp-iterator
Re-architect Iterator class.
2 parents f024851 + 614a3c3 commit ba13299

File tree

11 files changed

+577
-437
lines changed

11 files changed

+577
-437
lines changed

core/google/cloud/iterator.py

Lines changed: 154 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,21 @@
1717
These iterators simplify the process of paging through API responses
1818
where the response is a list of results with a ``nextPageToken``.
1919
20-
To make an iterator work, just override the ``get_items_from_response``
21-
method so that given a response (containing a page of results) it parses
22-
those results into an iterable of the actual objects you want::
20+
To make an iterator work, just override the ``PAGE_CLASS`` class
21+
attribute so that given a response (containing a page of results) can
22+
be parsed into an iterable page of the actual objects you want::
23+
24+
class MyPage(Page):
25+
26+
def _item_to_value(self, item):
27+
my_item = MyItemClass(other_arg=True)
28+
my_item._set_properties(item)
29+
return my_item
30+
2331
2432
class MyIterator(Iterator):
25-
def get_items_from_response(self, response):
26-
items = response.get('items', [])
27-
for item in items:
28-
my_item = MyItemClass(other_arg=True)
29-
my_item._set_properties(item)
30-
yield my_item
33+
34+
PAGE_CLASS = MyPage
3135
3236
You then can use this to get **all** the results from a resource::
3337
@@ -38,25 +42,114 @@ def get_items_from_response(self, response):
3842
you find what you're looking for (resulting in possibly fewer
3943
requests)::
4044
41-
>>> for item in MyIterator(...):
42-
>>> print(item.name)
43-
>>> if not item.is_valid:
44-
>>> break
45+
>>> for my_item in MyIterator(...):
46+
... print(my_item.name)
47+
... if not my_item.is_valid:
48+
... break
49+
50+
When iterating, not every new item will send a request to the server.
51+
To monitor these requests, track the current page of the iterator::
52+
53+
>>> iterator = MyIterator(...)
54+
>>> iterator.page_number
55+
0
56+
>>> next(iterator)
57+
<MyItemClass at 0x7f1d3cccf690>
58+
>>> iterator.page_number
59+
1
60+
>>> iterator.page.remaining
61+
1
62+
>>> next(iterator)
63+
<MyItemClass at 0x7f1d3cccfe90>
64+
>>> iterator.page.remaining
65+
0
66+
>>> next(iterator)
67+
<MyItemClass at 0x7f1d3cccffd0>
68+
>>> iterator.page_number
69+
2
70+
>>> iterator.page.remaining
71+
19
4572
"""
4673

4774

4875
import six
4976

5077

78+
class Page(object):
79+
"""Single page of results in an iterator.
80+
81+
:type parent: :class:`Iterator`
82+
:param parent: The iterator that owns the current page.
83+
84+
:type response: dict
85+
:param response: The JSON API response for a page.
86+
"""
87+
88+
ITEMS_KEY = 'items'
89+
90+
def __init__(self, parent, response):
91+
self._parent = parent
92+
items = response.get(self.ITEMS_KEY, ())
93+
self._num_items = len(items)
94+
self._remaining = self._num_items
95+
self._item_iter = iter(items)
96+
97+
@property
98+
def num_items(self):
99+
"""Total items in the page.
100+
101+
:rtype: int
102+
:returns: The number of items in this page of items.
103+
"""
104+
return self._num_items
105+
106+
@property
107+
def remaining(self):
108+
"""Remaining items in the page.
109+
110+
:rtype: int
111+
:returns: The number of items remaining this page.
112+
"""
113+
return self._remaining
114+
115+
def __iter__(self):
116+
"""The :class:`Page` is an iterator."""
117+
return self
118+
119+
def _item_to_value(self, item):
120+
"""Get the next item in the page.
121+
122+
This method (along with the constructor) is the workhorse
123+
of this class. Subclasses will need to implement this method.
124+
125+
:type item: dict
126+
:param item: An item to be converted to a native object.
127+
128+
:raises NotImplementedError: Always
129+
"""
130+
raise NotImplementedError
131+
132+
def next(self):
133+
"""Get the next value in the iterator."""
134+
item = six.next(self._item_iter)
135+
result = self._item_to_value(item)
136+
# Since we've successfully got the next value from the
137+
# iterator, we update the number of remaining.
138+
self._remaining -= 1
139+
return result
140+
141+
# Alias needed for Python 2/3 support.
142+
__next__ = next
143+
144+
51145
class Iterator(object):
52146
"""A generic class for iterating through Cloud JSON APIs list responses.
53147
148+
Sub-classes need to over-write ``PAGE_CLASS``.
149+
54150
:type client: :class:`google.cloud.client.Client`
55151
:param client: The client, which owns a connection to make requests.
56152
57-
:type path: str
58-
:param path: The path to query for the list of items.
59-
60153
:type page_token: str
61154
:param page_token: (Optional) A token identifying a page in a result set.
62155
@@ -65,59 +158,74 @@ class Iterator(object):
65158
66159
:type extra_params: dict or None
67160
:param extra_params: Extra query string parameters for the API call.
161+
162+
:type path: str
163+
:param path: The path to query for the list of items.
68164
"""
69165

70166
PAGE_TOKEN = 'pageToken'
71167
MAX_RESULTS = 'maxResults'
72168
RESERVED_PARAMS = frozenset([PAGE_TOKEN, MAX_RESULTS])
169+
PAGE_CLASS = Page
170+
PATH = None
73171

74-
def __init__(self, client, path, page_token=None,
75-
max_results=None, extra_params=None):
172+
def __init__(self, client, page_token=None, max_results=None,
173+
extra_params=None, path=None):
174+
self.extra_params = extra_params or {}
175+
self._verify_params()
176+
self.max_results = max_results
76177
self.client = client
77-
self.path = path
178+
self.path = path or self.PATH
179+
# The attributes below will change over the life of the iterator.
78180
self.page_number = 0
79181
self.next_page_token = page_token
80-
self.max_results = max_results
81182
self.num_results = 0
82-
self.extra_params = extra_params or {}
183+
self._page = None
184+
185+
def _verify_params(self):
186+
"""Verifies the parameters don't use any reserved parameter.
187+
188+
:raises ValueError: If a reserved parameter is used.
189+
"""
83190
reserved_in_use = self.RESERVED_PARAMS.intersection(
84191
self.extra_params)
85192
if reserved_in_use:
86-
raise ValueError(('Using a reserved parameter',
87-
reserved_in_use))
88-
self._curr_items = iter(())
193+
raise ValueError('Using a reserved parameter',
194+
reserved_in_use)
195+
196+
@property
197+
def page(self):
198+
"""The current page of results that has been retrieved.
199+
200+
:rtype: :class:`Page`
201+
:returns: The page of items that has been retrieved.
202+
"""
203+
return self._page
89204

90205
def __iter__(self):
91206
"""The :class:`Iterator` is an iterator."""
92207
return self
93208

94-
def _update_items(self):
95-
"""Replace the current items iterator.
96-
97-
Intended to be used when the current items iterator is exhausted.
209+
def _update_page(self):
210+
"""Replace the current page.
98211
99-
After replacing the iterator, consumes the first value to make sure
100-
it is valid.
212+
Does nothing if the current page is non-null and has items
213+
remaining.
101214
102-
:rtype: object
103-
:returns: The first item in the next iterator.
104215
:raises: :class:`~exceptions.StopIteration` if there is no next page.
105216
"""
217+
if self.page is not None and self.page.remaining > 0:
218+
return
106219
if self.has_next_page():
107-
response = self.get_next_page_response()
108-
items = self.get_items_from_response(response)
109-
self._curr_items = iter(items)
110-
return six.next(self._curr_items)
220+
response = self._get_next_page_response()
221+
self._page = self.PAGE_CLASS(self, response)
111222
else:
112223
raise StopIteration
113224

114225
def next(self):
115226
"""Get the next value in the iterator."""
116-
try:
117-
item = six.next(self._curr_items)
118-
except StopIteration:
119-
item = self._update_items()
120-
227+
self._update_page()
228+
item = six.next(self.page)
121229
self.num_results += 1
122230
return item
123231

@@ -139,7 +247,7 @@ def has_next_page(self):
139247

140248
return self.next_page_token is not None
141249

142-
def get_query_params(self):
250+
def _get_query_params(self):
143251
"""Getter for query parameters for the next request.
144252
145253
:rtype: dict
@@ -153,17 +261,15 @@ def get_query_params(self):
153261
result.update(self.extra_params)
154262
return result
155263

156-
def get_next_page_response(self):
264+
def _get_next_page_response(self):
157265
"""Requests the next page from the path provided.
158266
159267
:rtype: dict
160268
:returns: The parsed JSON response of the next page's contents.
161269
"""
162-
if not self.has_next_page():
163-
raise RuntimeError('No more pages. Try resetting the iterator.')
164-
165270
response = self.client.connection.api_request(
166-
method='GET', path=self.path, query_params=self.get_query_params())
271+
method='GET', path=self.path,
272+
query_params=self._get_query_params())
167273

168274
self.page_number += 1
169275
self.next_page_token = response.get('nextPageToken')
@@ -175,62 +281,4 @@ def reset(self):
175281
self.page_number = 0
176282
self.next_page_token = None
177283
self.num_results = 0
178-
179-
def get_items_from_response(self, response):
180-
"""Factory method called while iterating. This should be overridden.
181-
182-
This method should be overridden by a subclass. It should
183-
accept the API response of a request for the next page of items,
184-
and return a list (or other iterable) of items.
185-
186-
Typically this method will construct a Bucket or a Blob from the
187-
page of results in the response.
188-
189-
:type response: dict
190-
:param response: The response of asking for the next page of items.
191-
"""
192-
raise NotImplementedError
193-
194-
195-
class MethodIterator(object):
196-
"""Method-based iterator iterating through Cloud JSON APIs list responses.
197-
198-
:type method: instance method
199-
:param method: ``list_foo`` method of a domain object, taking as arguments
200-
``page_token``, ``page_size``, and optional additional
201-
keyword arguments.
202-
203-
:type page_token: string or ``NoneType``
204-
:param page_token: Initial page token to pass. if ``None``, fetch the
205-
first page from the ``method`` API call.
206-
207-
:type page_size: integer or ``NoneType``
208-
:param page_size: Maximum number of items to return from the ``method``
209-
API call; if ``None``, uses the default for the API.
210-
211-
:type max_calls: integer or ``NoneType``
212-
:param max_calls: Maximum number of times to make the ``method``
213-
API call; if ``None``, applies no limit.
214-
215-
:type kw: dict
216-
:param kw: optional keyword arguments to be passed to ``method``.
217-
"""
218-
def __init__(self, method, page_token=None, page_size=None,
219-
max_calls=None, **kw):
220-
self._method = method
221-
self._token = page_token
222-
self._page_size = page_size
223-
self._kw = kw
224-
self._max_calls = max_calls
225-
self._page_num = 0
226-
227-
def __iter__(self):
228-
while self._max_calls is None or self._page_num < self._max_calls:
229-
items, new_token = self._method(
230-
page_token=self._token, page_size=self._page_size, **self._kw)
231-
for item in items:
232-
yield item
233-
if new_token is None:
234-
return
235-
self._page_num += 1
236-
self._token = new_token
284+
self._page = None

0 commit comments

Comments
 (0)