diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst index e201260f964..614e4fff6d3 100644 --- a/docs/topics/downloader-middleware.rst +++ b/docs/topics/downloader-middleware.rst @@ -634,8 +634,8 @@ settings (see the settings documentation for more info): .. reqmeta:: dont_redirect -If :attr:`Request.meta ` contains the -``dont_redirect`` key, the request will be ignored by this middleware. +If :attr:`Request.meta ` has ``dont_redirect`` +key set to True, the request will be ignored by this middleware. RedirectMiddleware settings @@ -732,8 +732,8 @@ to indicate server overload, which would be something we want to retry. .. reqmeta:: dont_retry -If :attr:`Request.meta ` contains the ``dont_retry`` -key, the request will be ignored by this middleware. +If :attr:`Request.meta ` has ``dont_retry`` key +set to True, the request will be ignored by this middleware. RetryMiddleware Settings ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst index 192f4caeed6..4723565e7ad 100644 --- a/docs/topics/request-response.rst +++ b/docs/topics/request-response.rst @@ -83,7 +83,7 @@ Request objects cookies for that domain and will be sent again in future requests. That's the typical behaviour of any regular web browser. However, if, for some reason, you want to avoid merging with existing cookies you can instruct - Scrapy to do so by setting the ``dont_merge_cookies`` key in the + Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the :attr:`Request.meta`. Example of request without merging cookies:: @@ -102,7 +102,7 @@ Request objects :param priority: the priority of this request (defaults to ``0``). The priority is used by the scheduler to define the order used to process - requests. Requests with a higher priority value will execute earlier. + requests. Requests with a higher priority value will execute earlier. Negative values are allowed in order to indicate relatively low-priority. :type priority: int diff --git a/scrapy/contrib/downloadermiddleware/cookies.py b/scrapy/contrib/downloadermiddleware/cookies.py index b249f329d42..4b63b8112c5 100644 --- a/scrapy/contrib/downloadermiddleware/cookies.py +++ b/scrapy/contrib/downloadermiddleware/cookies.py @@ -22,7 +22,7 @@ def from_crawler(cls, crawler): return cls(crawler.settings.getbool('COOKIES_DEBUG')) def process_request(self, request, spider): - if 'dont_merge_cookies' in request.meta: + if request.meta.get('dont_merge_cookies', False): return cookiejarkey = request.meta.get("cookiejar") @@ -37,7 +37,7 @@ def process_request(self, request, spider): self._debug_cookie(request, spider) def process_response(self, request, response, spider): - if 'dont_merge_cookies' in request.meta: + if request.meta.get('dont_merge_cookies', False): return response # extract cookies from Set-Cookie and drop invalid/expired cookies diff --git a/scrapy/contrib/downloadermiddleware/redirect.py b/scrapy/contrib/downloadermiddleware/redirect.py index 6a42987e144..cfb10d4dba4 100644 --- a/scrapy/contrib/downloadermiddleware/redirect.py +++ b/scrapy/contrib/downloadermiddleware/redirect.py @@ -52,7 +52,7 @@ class RedirectMiddleware(BaseRedirectMiddleware): """Handle redirection of requests based on response status and meta-refresh html tag""" def process_response(self, request, response, spider): - if 'dont_redirect' in request.meta: + if request.meta.get('dont_redirect', False): return response if request.method == 'HEAD': @@ -86,7 +86,7 @@ def __init__(self, settings): settings.getint('METAREFRESH_MAXDELAY')) def process_response(self, request, response, spider): - if 'dont_redirect' in request.meta or request.method == 'HEAD' or \ + if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \ not isinstance(response, HtmlResponse): return response diff --git a/scrapy/contrib/downloadermiddleware/retry.py b/scrapy/contrib/downloadermiddleware/retry.py index 9cc54ed4897..f72f39431d5 100644 --- a/scrapy/contrib/downloadermiddleware/retry.py +++ b/scrapy/contrib/downloadermiddleware/retry.py @@ -50,7 +50,7 @@ def from_crawler(cls, crawler): return cls(crawler.settings) def process_response(self, request, response, spider): - if 'dont_retry' in request.meta: + if request.meta.get('dont_retry', False): return response if response.status in self.retry_http_codes: reason = response_status_message(response.status) @@ -59,8 +59,8 @@ def process_response(self, request, response, spider): def process_exception(self, request, exception, spider): if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \ - and 'dont_retry' not in request.meta: - return self._retry(request, exception, spider) + and not request.meta.get('dont_retry', False): + return self._retry(request, exception, spider) def _retry(self, request, reason, spider): retries = request.meta.get('retry_times', 0) + 1 diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py index 35a86b8ce53..ffa3a550c71 100644 --- a/tests/test_downloadermiddleware_cookies.py +++ b/tests/test_downloadermiddleware_cookies.py @@ -52,10 +52,16 @@ def test_dont_merge_cookies(self): res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'}) assert self.mw.process_response(req, res, self.spider) is res + # check that cookies are merged back req = Request('http://scrapytest.org/mergeme') assert self.mw.process_request(req, self.spider) is None self.assertEquals(req.headers.get('Cookie'), 'C1=value1') + # check that cookies are merged when dont_merge_cookies is passed as 0 + req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0}) + assert self.mw.process_request(req, self.spider) is None + self.assertEquals(req.headers.get('Cookie'), 'C1=value1') + def test_complex_cookies(self): # merge some cookies into jar cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'}, diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py index 8b871c7bc0c..9673d4594c5 100644 --- a/tests/test_downloadermiddleware_redirect.py +++ b/tests/test_downloadermiddleware_redirect.py @@ -50,6 +50,15 @@ def test_dont_redirect(self): assert isinstance(r, Response) assert r is rsp + # Test that it redirects when dont_redirect is False + req = Request(url, meta={'dont_redirect': False}) + rsp = Response(url2, status=200) + + r = self.mw.process_response(req, rsp, self.spider) + assert isinstance(r, Response) + assert r is rsp + + def test_redirect_302(self): url = 'http://www.example.com/302' url2 = 'http://www.example.com/redirected2' diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py index 4c771f18e9d..166c2bff614 100644 --- a/tests/test_downloadermiddleware_retry.py +++ b/tests/test_downloadermiddleware_retry.py @@ -40,6 +40,14 @@ def test_dont_retry(self): r = self.mw.process_response(req, rsp, self.spider) assert r is rsp + # Test retry when dont_retry set to False + req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False}) + rsp = Response('http://www.scrapytest.org/503') + + # first retry + r = self.mw.process_response(req, rsp, self.spider) + assert r is rsp + def test_dont_retry_exc(self): req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})