diff --git a/docs/inline_requests.rst b/docs/inline_requests.rst index 21f3359..1156ea8 100644 --- a/docs/inline_requests.rst +++ b/docs/inline_requests.rst @@ -4,10 +4,18 @@ inline_requests package Submodules ---------- -inline_requests.decorator module +inline_requests.generator module -------------------------------- -.. automodule:: inline_requests.decorator +.. automodule:: inline_requests.generator + :members: + :undoc-members: + :show-inheritance: + +inline_requests.utils module +---------------------------- + +.. automodule:: inline_requests.utils :members: :undoc-members: :show-inheritance: diff --git a/src/inline_requests/__init__.py b/src/inline_requests/__init__.py index 6167095..eb6a37c 100644 --- a/src/inline_requests/__init__.py +++ b/src/inline_requests/__init__.py @@ -1,7 +1,69 @@ # -*- coding: utf-8 -*- -from .decorator import inline_requests +from functools import wraps +from six import create_bound_method + +from .generator import RequestGenerator +from .utils import get_args __author__ = 'Rolando Espinoza' __email__ = 'rolando at rmax.io' __version__ = '0.3.0dev' + +__all__ = ['inline_requests'] + + +def inline_requests(method_or_func): + """A decorator to use coroutine-like spider callbacks. + + Example: + + .. code:: python + + class MySpider(Spider): + + @inline_callbacks + def parse(self, response): + next_url = response.urjoin('?next') + try: + next_resp = yield Request(next_url) + except Exception as e: + self.logger.exception("An error occurred.") + return + else: + yield {"next_url": next_resp.url} + + + You must conform with the following conventions: + + * The decorated method must be a spider method. + * The decorated method must use the ``yield`` keyword or return a + generator. + * The decorated method must accept ``response`` as the first argument. + * The decorated method must yield ``Request`` objects without neither + ``callback`` nor ``errback`` set. + + If your requests don't come back to the generator try setting the flag to + handle all http statuses: + + .. code:: python + + request.meta['handle_httpstatus_all'] = True + + """ + args = get_args(method_or_func) + if not args: + raise TypeError("Function must accept at least one argument.") + # XXX: hardcoded convention of 'self' as first argument for methods + if args[0] == 'self': + def wrapper(self, response, **kwargs): + callback = create_bound_method(method_or_func, self) + + genwrapper = RequestGenerator(callback, **kwargs) + return genwrapper(response) + else: + def wrapper(response, **kwargs): + genwrapper = RequestGenerator(method_or_func, **kwargs) + return genwrapper(response) + + return wraps(method_or_func)(wrapper) diff --git a/src/inline_requests/decorator.py b/src/inline_requests/generator.py similarity index 52% rename from src/inline_requests/decorator.py rename to src/inline_requests/generator.py index a3a4d5e..df94dd3 100644 --- a/src/inline_requests/decorator.py +++ b/src/inline_requests/generator.py @@ -1,83 +1,14 @@ -import inspect -import types - -from functools import partial, wraps -from six import create_bound_method +from functools import partial +from types import GeneratorType from scrapy.http import Request from scrapy.utils.spider import iterate_spider_output -def _get_args(method_or_func): - """ - Return method or function arguments. +class RequestGenerator(object): + """This is the core class that wraps the callback and outputs the requests + one by one. """ - try: - # Python 3.0+ - args = list(inspect.signature(method_or_func).parameters.keys()) - except AttributeError: - # Python 2.7 - args = inspect.getargspec(method_or_func).args - return args - - -def inline_requests(method_or_func): - """A decorator to use coroutine-like spider callbacks. - - Example: - - .. code:: python - - class MySpider(Spider): - - @inline_callbacks - def parse(self, response): - next_url = response.urjoin('?next') - try: - next_resp = yield Request(next_url) - except Exception as e: - self.logger.exception("An error occurred.") - return - else: - yield {"next_url": next_resp.url} - - - You must conform with the following conventions: - - * The decorated method must be a spider method. - * The decorated method must use the ``yield`` keyword or return a - generator. - * The decorated method must accept ``response`` as the first argument. - * The decorated method must yield ``Request`` objects without neither - ``callback`` nor ``errback`` set. - - If your requests don't come back to the generator try setting the flag to - handle all http statuses: - - .. code:: python - - request.meta['handle_httpstatus_all'] = True - - """ - args = _get_args(method_or_func) - if not args: - raise TypeError("Function must accept at least one argument.") - # XXX: hardcoded convention of 'self' as first argument for methods - if args[0] == 'self': - def wrapper(self, response, **kwargs): - callback = create_bound_method(method_or_func, self) - - genwrapper = _RequestGenerator(callback, **kwargs) - return genwrapper(response) - else: - def wrapper(response, **kwargs): - genwrapper = _RequestGenerator(method_or_func, **kwargs) - return genwrapper(response) - - return wraps(method_or_func)(wrapper) - - -class _RequestGenerator(object): def __init__(self, callback, **kwargs): self.callback = callback @@ -85,7 +16,7 @@ def __init__(self, callback, **kwargs): def __call__(self, response): output = iterate_spider_output(self.callback(response=response, **self.kwargs)) - if not isinstance(output, types.GeneratorType): + if not isinstance(output, GeneratorType): raise ValueError("Callback must return a generator type") return self._unwindGenerator(output) diff --git a/src/inline_requests/utils.py b/src/inline_requests/utils.py new file mode 100644 index 0000000..aa471cd --- /dev/null +++ b/src/inline_requests/utils.py @@ -0,0 +1,12 @@ +import inspect + + +def get_args(method_or_func): + """Returns method or function arguments.""" + try: + # Python 3.0+ + args = list(inspect.signature(method_or_func).parameters.keys()) + except AttributeError: + # Python 2.7 + args = inspect.getargspec(method_or_func).args + return args