googleapis · dhermes · Oct 18, 2016 · Oct 18, 2016 · Oct 18, 2016
diff --git a/core/google/cloud/iterator.py b/core/google/cloud/iterator.py
@@ -17,37 +17,35 @@
 These iterators simplify the process of paging through API responses
 where the response is a list of results with a ``nextPageToken``.
 
-To make an iterator work, you may need to override the
-``ITEMS_KEY`` class attribute so that a given response (containing a page of
-results) can be parsed into an iterable page of the actual objects you want::
-
-  class MyIterator(Iterator):
-
-      ITEMS_KEY = 'blocks'
-
-      def _item_to_value(self, item):
-          my_item = MyItemClass(other_arg=True)
-          my_item._set_properties(item)
-          return my_item
-
-You then can use this to get **all** the results from a resource::
-
-    >>> iterator = MyIterator(...)
+To make an iterator work, you'll need to provide a way to convert a JSON
+item returned from the API into the object of your choice (via
+``item_to_value``). You also may need to specify a custom ``items_key`` so
+that a given response (containing a page of results) can be parsed into an
+iterable page of the actual objects you want. You then can use this to get
+**all** the results from a resource::
+
+    >>> def item_to_value(iterator, item):
+    ...     my_item = MyItemClass(iterator.client, other_arg=True)
+    ...     my_item._set_properties(item)
+    ...     return my_item
+    ...
+    >>> iterator = Iterator(..., items_key='blocks',
+    ...                     item_to_value=item_to_value)
     >>> list(iterator)  # Convert to a list (consumes all values).
 
 Or you can walk your way through items and call off the search early if
 you find what you're looking for (resulting in possibly fewer
 requests)::
 
-    >>> for my_item in MyIterator(...):
+    >>> for my_item in Iterator(...):
     ...     print(my_item.name)
     ...     if not my_item.is_valid:
     ...         break
 
 When iterating, not every new item will send a request to the server.
 To monitor these requests, track the current page of the iterator::
 
-    >>> iterator = MyIterator(...)
+    >>> iterator = Iterator(...)
     >>> iterator.page_number
     0
     >>> next(iterator)
@@ -58,6 +56,8 @@ def _item_to_value(self, item):
     1
     >>> next(iterator)
     <MyItemClass at 0x7f1d3cccfe90>
+    >>> iterator.page_number
+    1
     >>> iterator.page.remaining
     0
     >>> next(iterator)
@@ -70,7 +70,7 @@ def _item_to_value(self, item):
 It's also possible to consume an entire page and handle the paging process
 manually::
 
-    >>> iterator = MyIterator(...)
+    >>> iterator = Iterator(...)
     >>> # Manually pull down the first page.
     >>> iterator.update_page()
     >>> items = list(iterator.page)
@@ -96,6 +96,8 @@ def _item_to_value(self, item):
     ]
     >>>
     >>> # When there are no more results
+    >>> iterator.next_page_token is None
+    True
     >>> iterator.update_page()
     >>> iterator.page is None
     True
@@ -113,6 +115,26 @@ def _item_to_value(self, item):
 _PAGE_ERR_TEMPLATE = (
     'Tried to update the page while current page (%r) still has %d '
     'items remaining.')
+DEFAULT_ITEMS_KEY = 'items'
+"""The dictionary key used to retrieve items from each response."""
+
+
+# pylint: disable=unused-argument
+def _do_nothing_page_start(iterator, page, response):
+    """Helper to provide custom behavior after a :class:`Page` is started.
+
+    This is a do-nothing stand-in as the default value.
+
+    :type iterator: :class:`Iterator`
+    :param iterator: An iterator that holds some request info.
+
+    :type page: :class:`Page`
+    :param page: The page that was just created.
+
+    :type response: dict
+    :param response: The JSON API response for a page.
+    """
+# pylint: enable=unused-argument
 
 
 class Page(object):
@@ -127,15 +149,21 @@ class Page(object):
     :type items_key: str
     :param items_key: The dictionary key used to retrieve items
                       from the response.
+
+    :type item_to_value: callable
+    :param item_to_value: Callable to convert an item from JSON
+                          into the native object. Assumed signature
+                          takes an :class:`Iterator` and a dictionary
+                          holding a single item.
     """
 
-    def __init__(self, parent, response, items_key):
+    def __init__(self, parent, response, items_key, item_to_value):
         self._parent = parent
         items = response.get(items_key, ())
         self._num_items = len(items)
         self._remaining = self._num_items
         self._item_iter = iter(items)
-        self.response = response
+        self._item_to_value = item_to_value
 
     @property
     def num_items(self):
@@ -162,7 +190,7 @@ def __iter__(self):
     def next(self):
         """Get the next value in the page."""
         item = six.next(self._item_iter)
-        result = self._parent._item_to_value(item)
+        result = self._item_to_value(self._parent, item)
         # Since we've successfully got the next value from the
         # iterator, we update the number of remaining.
         self._remaining -= 1
@@ -175,12 +203,23 @@ def next(self):
 class Iterator(object):
     """A generic class for iterating through Cloud JSON APIs list responses.
 
-    Sub-classes need to over-write :attr:`ITEMS_KEY` and to define
-    :meth:`_item_to_value`.
-
     :type client: :class:`~google.cloud.client.Client`
     :param client: The client, which owns a connection to make requests.
 
+    :type path: str
+    :param path: The path to query for the list of items. Defaults
+                 to :attr:`PATH` on the current iterator class.
+
+    :type item_to_value: callable
+    :param item_to_value: Callable to convert an item from JSON
+                          into the native object. Assumed signature
+                          takes an :class:`Iterator` and a dictionary
+                          holding a single item.
+
+    :type items_key: str
+    :param items_key: (Optional) The key used to grab retrieved items from an
+                      API response. Defaults to :data:`DEFAULT_ITEMS_KEY`.
+
     :type page_token: str
     :param page_token: (Optional) A token identifying a page in a result set.
 
@@ -191,26 +230,32 @@ class Iterator(object):
     :param extra_params: (Optional) Extra query string parameters for the
                          API call.
 
-    :type path: str
-    :param path: (Optional) The path to query for the list of items. Defaults
-                 to :attr:`PATH` on the current iterator class.
+    :type page_start: callable
+    :param page_start: (Optional) Callable to provide any special behavior
+                       after a new page has been created. Assumed signature
+                       takes the :class:`Iterator` that started the page,
+                       the :class:`Page` that was started and the dictionary
+                       containing the page response.
     """
 
-    PAGE_TOKEN = 'pageToken'
-    MAX_RESULTS = 'maxResults'
-    RESERVED_PARAMS = frozenset([PAGE_TOKEN, MAX_RESULTS])
-    PATH = None
-    ITEMS_KEY = 'items'
-    """The dictionary key used to retrieve items from each response."""
-    _PAGE_CLASS = Page
-
-    def __init__(self, client, page_token=None, max_results=None,
-                 extra_params=None, path=None):
-        self.extra_params = extra_params or {}
-        self._verify_params()
-        self.max_results = max_results
+    _PAGE_TOKEN = 'pageToken'
+    _MAX_RESULTS = 'maxResults'
+    _RESERVED_PARAMS = frozenset([_PAGE_TOKEN, _MAX_RESULTS])
+
+    def __init__(self, client, path, item_to_value,
+                 items_key=DEFAULT_ITEMS_KEY,
+                 page_token=None, max_results=None, extra_params=None,
+                 page_start=_do_nothing_page_start):
         self.client = client
-        self.path = path or self.PATH
+        self.path = path
+        self._items_key = items_key
+        self._item_to_value = item_to_value
+        self.max_results = max_results
+        self.extra_params = extra_params
+        self._page_start = page_start
+        if self.extra_params is None:
+            self.extra_params = {}
+        self._verify_params()
         # The attributes below will change over the life of the iterator.
         self.page_number = 0
         self.next_page_token = page_token
@@ -222,7 +267,7 @@ def _verify_params(self):
 
         :raises ValueError: If a reserved parameter is used.
         """
-        reserved_in_use = self.RESERVED_PARAMS.intersection(
+        reserved_in_use = self._RESERVED_PARAMS.intersection(
             self.extra_params)
         if reserved_in_use:
             raise ValueError('Using a reserved parameter',
@@ -275,26 +320,16 @@ def update_page(self, require_empty=True):
         if page_empty:
             if self._has_next_page():
                 response = self._get_next_page_response()
-                self._page = self._PAGE_CLASS(self, response, self.ITEMS_KEY)
+                self._page = Page(self, response, self._items_key,
+                                  self._item_to_value)
+                self._page_start(self, self._page, response)
             else:
                 self._page = None
         else:
             if require_empty:
                 msg = _PAGE_ERR_TEMPLATE % (self._page, self.page.remaining)
                 raise ValueError(msg)
 
-    def _item_to_value(self, item):
-        """Get the next item in the page.
-
-        Subclasses will need to implement this method.
-
-        :type item: dict
-        :param item: An item to be converted to a native object.
-
-        :raises NotImplementedError: Always
-        """
-        raise NotImplementedError
-
     def next(self):
         """Get the next item from the request."""
         self.update_page(require_empty=False)
@@ -330,9 +365,9 @@ def _get_query_params(self):
         """
         result = {}
         if self.next_page_token is not None:
-            result[self.PAGE_TOKEN] = self.next_page_token
+            result[self._PAGE_TOKEN] = self.next_page_token
         if self.max_results is not None:
-            result[self.MAX_RESULTS] = self.max_results - self.num_results
+            result[self._MAX_RESULTS] = self.max_results - self.num_results
         result.update(self.extra_params)
         return result