Skip to content

Commit

Permalink
Better code organization.
Browse files Browse the repository at this point in the history
  • Loading branch information
rmax committed Jun 24, 2016
1 parent c6ae03f commit 15fe8a9
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 78 deletions.
12 changes: 10 additions & 2 deletions docs/inline_requests.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@ inline_requests package
Submodules
----------

inline_requests.decorator module
inline_requests.generator module
--------------------------------

.. automodule:: inline_requests.decorator
.. automodule:: inline_requests.generator
:members:
:undoc-members:
:show-inheritance:

inline_requests.utils module
----------------------------

.. automodule:: inline_requests.utils
:members:
:undoc-members:
:show-inheritance:
Expand Down
64 changes: 63 additions & 1 deletion src/inline_requests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,69 @@
# -*- coding: utf-8 -*-
from .decorator import inline_requests
from functools import wraps
from six import create_bound_method

from .generator import RequestGenerator
from .utils import get_args


__author__ = 'Rolando Espinoza'
__email__ = 'rolando at rmax.io'
__version__ = '0.3.0dev'

__all__ = ['inline_requests']


def inline_requests(method_or_func):
"""A decorator to use coroutine-like spider callbacks.
Example:
.. code:: python
class MySpider(Spider):
@inline_callbacks
def parse(self, response):
next_url = response.urjoin('?next')
try:
next_resp = yield Request(next_url)
except Exception as e:
self.logger.exception("An error occurred.")
return
else:
yield {"next_url": next_resp.url}
You must conform with the following conventions:
* The decorated method must be a spider method.
* The decorated method must use the ``yield`` keyword or return a
generator.
* The decorated method must accept ``response`` as the first argument.
* The decorated method must yield ``Request`` objects without neither
``callback`` nor ``errback`` set.
If your requests don't come back to the generator try setting the flag to
handle all http statuses:
.. code:: python
request.meta['handle_httpstatus_all'] = True
"""
args = get_args(method_or_func)
if not args:
raise TypeError("Function must accept at least one argument.")
# XXX: hardcoded convention of 'self' as first argument for methods
if args[0] == 'self':
def wrapper(self, response, **kwargs):
callback = create_bound_method(method_or_func, self)

genwrapper = RequestGenerator(callback, **kwargs)
return genwrapper(response)
else:
def wrapper(response, **kwargs):
genwrapper = RequestGenerator(method_or_func, **kwargs)
return genwrapper(response)

return wraps(method_or_func)(wrapper)
Original file line number Diff line number Diff line change
@@ -1,91 +1,22 @@
import inspect
import types

from functools import partial, wraps
from six import create_bound_method
from functools import partial
from types import GeneratorType

from scrapy.http import Request
from scrapy.utils.spider import iterate_spider_output


def _get_args(method_or_func):
"""
Return method or function arguments.
class RequestGenerator(object):
"""This is the core class that wraps the callback and outputs the requests
one by one.
"""
try:
# Python 3.0+
args = list(inspect.signature(method_or_func).parameters.keys())
except AttributeError:
# Python 2.7
args = inspect.getargspec(method_or_func).args
return args


def inline_requests(method_or_func):
"""A decorator to use coroutine-like spider callbacks.
Example:
.. code:: python
class MySpider(Spider):
@inline_callbacks
def parse(self, response):
next_url = response.urjoin('?next')
try:
next_resp = yield Request(next_url)
except Exception as e:
self.logger.exception("An error occurred.")
return
else:
yield {"next_url": next_resp.url}
You must conform with the following conventions:
* The decorated method must be a spider method.
* The decorated method must use the ``yield`` keyword or return a
generator.
* The decorated method must accept ``response`` as the first argument.
* The decorated method must yield ``Request`` objects without neither
``callback`` nor ``errback`` set.
If your requests don't come back to the generator try setting the flag to
handle all http statuses:
.. code:: python
request.meta['handle_httpstatus_all'] = True
"""
args = _get_args(method_or_func)
if not args:
raise TypeError("Function must accept at least one argument.")
# XXX: hardcoded convention of 'self' as first argument for methods
if args[0] == 'self':
def wrapper(self, response, **kwargs):
callback = create_bound_method(method_or_func, self)

genwrapper = _RequestGenerator(callback, **kwargs)
return genwrapper(response)
else:
def wrapper(response, **kwargs):
genwrapper = _RequestGenerator(method_or_func, **kwargs)
return genwrapper(response)

return wraps(method_or_func)(wrapper)


class _RequestGenerator(object):

def __init__(self, callback, **kwargs):
self.callback = callback
self.kwargs = kwargs

def __call__(self, response):
output = iterate_spider_output(self.callback(response=response, **self.kwargs))
if not isinstance(output, types.GeneratorType):
if not isinstance(output, GeneratorType):
raise ValueError("Callback must return a generator type")
return self._unwindGenerator(output)

Expand Down
12 changes: 12 additions & 0 deletions src/inline_requests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import inspect


def get_args(method_or_func):
"""Returns method or function arguments."""
try:
# Python 3.0+
args = list(inspect.signature(method_or_func).parameters.keys())
except AttributeError:
# Python 2.7
args = inspect.getargspec(method_or_func).args
return args

0 comments on commit 15fe8a9

Please sign in to comment.