Skip to content

Commit

Permalink
Merge pull request #4694 from Jgaldos/improve-httpstatus-all-meta
Browse files Browse the repository at this point in the history
Improve http status all on http error middleware
  • Loading branch information
kmike authored Apr 1, 2021
2 parents cc095aa + a41c205 commit f0c8d31
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 2 deletions.
3 changes: 2 additions & 1 deletion docs/topics/spider-middleware.rst
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ this::
The ``handle_httpstatus_list`` key of :attr:`Request.meta
<scrapy.http.Request.meta>` can also be used to specify which response codes to
allow on a per-request basis. You can also set the meta key ``handle_httpstatus_all``
to ``True`` if you want to allow any response code for a request.
to ``True`` if you want to allow any response code for a request, and ``False`` to
disable the effects of the ``handle_httpstatus_all`` key.

Keep in mind, however, that it's usually a bad idea to handle non-200
responses, unless you really know what you're doing.
Expand Down
2 changes: 1 addition & 1 deletion scrapy/spidermiddlewares/httperror.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def process_spider_input(self, response, spider):
if 200 <= response.status < 300: # common case
return
meta = response.meta
if 'handle_httpstatus_all' in meta:
if meta.get('handle_httpstatus_all', False):
return
if 'handle_httpstatus_list' in meta:
allowed_statuses = meta['handle_httpstatus_list']
Expand Down
13 changes: 13 additions & 0 deletions tests/test_spidermiddleware_httperror.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,19 @@ def test_meta_overrides_settings(self):
self.assertIsNone(self.mw.process_spider_input(res404, self.spider))
self.assertRaises(HttpError, self.mw.process_spider_input, res402, self.spider)

def test_httperror_allow_all_false(self):
crawler = get_crawler(_HttpErrorSpider)
mw = HttpErrorMiddleware.from_crawler(crawler)
request_httpstatus_false = Request('http://scrapytest.org', meta={'handle_httpstatus_all': False})
request_httpstatus_true = Request('http://scrapytest.org', meta={'handle_httpstatus_all': True})
res404 = self.res404.copy()
res404.request = request_httpstatus_false
res402 = self.res402.copy()
res402.request = request_httpstatus_true

self.assertRaises(HttpError, mw.process_spider_input, res404, self.spider)
self.assertIsNone(mw.process_spider_input(res402, self.spider))


class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
def setUp(self):
Expand Down

0 comments on commit f0c8d31

Please sign in to comment.