Merge pull request scrapy#846 from rocioar/master

fix dont_merge_cookies bad behaviour when set to false on meta
DamonGuo · Aug 18, 2014 · d684eca · d684eca
2 parents 94d00b2 + 51b0bd2
commit d684eca
Show file tree

Hide file tree

Showing 8 changed files with 36 additions and 13 deletions.
diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst
@@ -634,8 +634,8 @@ settings (see the settings documentation for more info):
 
 .. reqmeta:: dont_redirect
 
-If :attr:`Request.meta <scrapy.http.Request.meta>` contains the
-``dont_redirect`` key, the request will be ignored by this middleware.
+If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_redirect``
+key set to True, the request will be ignored by this middleware.
 
 
 RedirectMiddleware settings
@@ -732,8 +732,8 @@ to indicate server overload, which would be something we want to retry.
 
 .. reqmeta:: dont_retry
 
-If :attr:`Request.meta <scrapy.http.Request.meta>` contains the ``dont_retry``
-key, the request will be ignored by this middleware.
+If :attr:`Request.meta <scrapy.http.Request.meta>` has ``dont_retry`` key
+set to True, the request will be ignored by this middleware.
 
 RetryMiddleware Settings
 ~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/docs/topics/request-response.rst b/docs/topics/request-response.rst
@@ -83,7 +83,7 @@ Request objects
         cookies for that domain and will be sent again in future requests. That's
         the typical behaviour of any regular web browser. However, if, for some
         reason, you want to avoid merging with existing cookies you can instruct
-        Scrapy to do so by setting the ``dont_merge_cookies`` key in the
+        Scrapy to do so by setting the ``dont_merge_cookies`` key to True in the
         :attr:`Request.meta`.
 
         Example of request without merging cookies::
@@ -102,7 +102,7 @@ Request objects
 
     :param priority: the priority of this request (defaults to ``0``).
        The priority is used by the scheduler to define the order used to process
-       requests.  Requests with a higher priority value will execute earlier.  
+       requests.  Requests with a higher priority value will execute earlier.
        Negative values are allowed in order to indicate relatively low-priority.
     :type priority: int
 

diff --git a/scrapy/contrib/downloadermiddleware/cookies.py b/scrapy/contrib/downloadermiddleware/cookies.py
@@ -22,7 +22,7 @@ def from_crawler(cls, crawler):
         return cls(crawler.settings.getbool('COOKIES_DEBUG'))
 
     def process_request(self, request, spider):
-        if 'dont_merge_cookies' in request.meta:
+        if request.meta.get('dont_merge_cookies', False):
             return
 
         cookiejarkey = request.meta.get("cookiejar")
@@ -37,7 +37,7 @@ def process_request(self, request, spider):
         self._debug_cookie(request, spider)
 
     def process_response(self, request, response, spider):
-        if 'dont_merge_cookies' in request.meta:
+        if request.meta.get('dont_merge_cookies', False):
             return response
 
         # extract cookies from Set-Cookie and drop invalid/expired cookies

diff --git a/scrapy/contrib/downloadermiddleware/redirect.py b/scrapy/contrib/downloadermiddleware/redirect.py
@@ -52,7 +52,7 @@ class RedirectMiddleware(BaseRedirectMiddleware):
     """Handle redirection of requests based on response status and meta-refresh html tag"""
 
     def process_response(self, request, response, spider):
-        if 'dont_redirect' in request.meta:
+        if request.meta.get('dont_redirect', False):
             return response
 
         if request.method == 'HEAD':
@@ -86,7 +86,7 @@ def __init__(self, settings):
                                          settings.getint('METAREFRESH_MAXDELAY'))
 
     def process_response(self, request, response, spider):
-        if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
+        if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
                 not isinstance(response, HtmlResponse):
             return response
 

diff --git a/scrapy/contrib/downloadermiddleware/retry.py b/scrapy/contrib/downloadermiddleware/retry.py
@@ -50,7 +50,7 @@ def from_crawler(cls, crawler):
         return cls(crawler.settings)
 
     def process_response(self, request, response, spider):
-        if 'dont_retry' in request.meta:
+        if request.meta.get('dont_retry', False):
             return response
         if response.status in self.retry_http_codes:
             reason = response_status_message(response.status)
@@ -59,8 +59,8 @@ def process_response(self, request, response, spider):
 
     def process_exception(self, request, exception, spider):
         if isinstance(exception, self.EXCEPTIONS_TO_RETRY) \
-                and 'dont_retry' not in request.meta:
-            return self._retry(request, exception, spider)
+                and not request.meta.get('dont_retry', False):
+             return self._retry(request, exception, spider)
 
     def _retry(self, request, reason, spider):
         retries = request.meta.get('retry_times', 0) + 1

diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
@@ -52,10 +52,16 @@ def test_dont_merge_cookies(self):
         res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
         assert self.mw.process_response(req, res, self.spider) is res
 
+        # check that cookies are merged back
         req = Request('http://scrapytest.org/mergeme')
         assert self.mw.process_request(req, self.spider) is None
         self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
 
+        # check that cookies are merged when dont_merge_cookies is passed as 0
+        req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
+        assert self.mw.process_request(req, self.spider) is None
+        self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
+
     def test_complex_cookies(self):
         # merge some cookies into jar
         cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},

diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py
@@ -50,6 +50,15 @@ def test_dont_redirect(self):
         assert isinstance(r, Response)
         assert r is rsp
 
+        # Test that it redirects when dont_redirect is False
+        req = Request(url, meta={'dont_redirect': False})
+        rsp = Response(url2, status=200)
+
+        r = self.mw.process_response(req, rsp, self.spider)
+        assert isinstance(r, Response)
+        assert r is rsp
+
+
     def test_redirect_302(self):
         url = 'http://www.example.com/302'
         url2 = 'http://www.example.com/redirected2'

diff --git a/tests/test_downloadermiddleware_retry.py b/tests/test_downloadermiddleware_retry.py
@@ -40,6 +40,14 @@ def test_dont_retry(self):
         r = self.mw.process_response(req, rsp, self.spider)
         assert r is rsp
 
+        # Test retry when dont_retry set to False
+        req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False})
+        rsp = Response('http://www.scrapytest.org/503')
+
+        # first retry
+        r = self.mw.process_response(req, rsp, self.spider)
+        assert r is rsp
+
     def test_dont_retry_exc(self):
         req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})