Skip to content

Commit

Permalink
fix dont settings on meta behaviour, add docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
rocioar committed Aug 15, 2014
1 parent 8fece4b commit 51b0bd2
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 13 deletions.
8 changes: 4 additions & 4 deletions docs/topics/downloader-middleware.rst
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,8 @@ settings (see the settings documentation for more info):

.. reqmeta:: dont_redirect

If :attr:`Request.meta <scrapy.http.Request.meta>` contains the
``dont_redirect`` key, the request will be ignored by this middleware.
If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_redirect``
key set to True, the request will be ignored by this middleware.


RedirectMiddleware settings
Expand Down Expand Up @@ -732,8 +732,8 @@ to indicate server overload, which would be something we want to retry.

.. reqmeta:: dont_retry

If :attr:`Request.meta <scrapy.http.Request.meta>` contains the ``dont_retry``
key, the request will be ignored by this middleware.
If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_retry`` key
set to True, the request will be ignored by this middleware.

RetryMiddleware Settings
~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions docs/topics/request-response.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Request objects
cookies for that domain and will be sent again in future requests. That's
the typical behaviour of any regular web browser. However, if, for some
reason, you want to avoid merging with existing cookies you can instruct
Scrapy to do so by setting the ``dont_merge_cookies`` key in the
Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the
:attr:`Request.meta`.

Example of request without merging cookies::
Expand All @@ -102,7 +102,7 @@ Request objects

:param priority: the priority of this request (defaults to ``0``).
The priority is used by the scheduler to define the order used to process
requests. Requests with a higher priority value will execute earlier.
requests. Requests with a higher priority value will execute earlier.
Negative values are allowed in order to indicate relatively low-priority.
:type priority: int

Expand Down
4 changes: 2 additions & 2 deletions scrapy/contrib/downloadermiddleware/cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def from_crawler(cls, crawler):
return cls(crawler.settings.getbool('COOKIES_DEBUG'))

def process_request(self, request, spider):
if 'dont_merge_cookies' in request.meta:
if request.meta.get('dont_merge_cookies', False):
return

cookiejarkey = request.meta.get("cookiejar")
Expand All @@ -37,7 +37,7 @@ def process_request(self, request, spider):
self._debug_cookie(request, spider)

def process_response(self, request, response, spider):
if 'dont_merge_cookies' in request.meta:
if request.meta.get('dont_merge_cookies', False):
return response

# extract cookies from Set-Cookie and drop invalid/expired cookies
Expand Down
4 changes: 2 additions & 2 deletions scrapy/contrib/downloadermiddleware/redirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class RedirectMiddleware(BaseRedirectMiddleware):
"""Handle redirection of requests based on response status and meta-refresh html tag"""

def process_response(self, request, response, spider):
if 'dont_redirect' in request.meta:
if request.meta.get('dont_redirect', False):
return response

if request.method == 'HEAD':
Expand Down Expand Up @@ -86,7 +86,7 @@ def __init__(self, settings):
settings.getint('METAREFRESH_MAXDELAY'))

def process_response(self, request, response, spider):
if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
not isinstance(response, HtmlResponse):
return response

Expand Down
6 changes: 3 additions & 3 deletions scrapy/contrib/downloadermiddleware/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def from_crawler(cls, crawler):
return cls(crawler.settings)

def process_response(self, request, response, spider):
if 'dont_retry' in request.meta:
if request.meta.get('dont_retry', False):
return response
if response.status in self.retry_http_codes:
reason = response_status_message(response.status)
Expand All @@ -59,8 +59,8 @@ def process_response(self, request, response, spider):

def process_exception(self, request, exception, spider):
if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \
and 'dont_retry' not in request.meta:
return self._retry(request, exception, spider)
and not request.meta.get('dont_retry', False):
return self._retry(request, exception, spider)

def _retry(self, request, reason, spider):
retries = request.meta.get('retry_times', 0) + 1
Expand Down
6 changes: 6 additions & 0 deletions tests/test_downloadermiddleware_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,16 @@ def test_dont_merge_cookies(self):
res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
assert self.mw.process_response(req, res, self.spider) is res

# check that cookies are merged back
req = Request('http://scrapytest.org/mergeme')
assert self.mw.process_request(req, self.spider) is None
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')

# check that cookies are merged when dont_merge_cookies is passed as 0
req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
assert self.mw.process_request(req, self.spider) is None
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')

def test_complex_cookies(self):
# merge some cookies into jar
cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
Expand Down
9 changes: 9 additions & 0 deletions tests/test_downloadermiddleware_redirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ def test_dont_redirect(self):
assert isinstance(r, Response)
assert r is rsp

# Test that it redirects when dont_redirect is False
req = Request(url, meta={'dont_redirect': False})
rsp = Response(url2, status=200)

r = self.mw.process_response(req, rsp, self.spider)
assert isinstance(r, Response)
assert r is rsp


def test_redirect_302(self):
url = 'http://www.example.com/302'
url2 = 'http://www.example.com/redirected2'
Expand Down
8 changes: 8 additions & 0 deletions tests/test_downloadermiddleware_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ def test_dont_retry(self):
r = self.mw.process_response(req, rsp, self.spider)
assert r is rsp

# Test retry when dont_retry set to False
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False})
rsp = Response('http://www.scrapytest.org/503')

# first retry
r = self.mw.process_response(req, rsp, self.spider)
assert r is rsp

def test_dont_retry_exc(self):
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})

Expand Down

0 comments on commit 51b0bd2

Please sign in to comment.