Skip to content

Diffing server exception handling #185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 43 additions & 8 deletions web_monitoring/diffing_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
b'<?xml\\s[^>]*encoding=[\'"]([^\'"]+)[\'"].*\?>',
re.IGNORECASE)


client = tornado.httpclient.AsyncHTTPClient()


Expand All @@ -67,17 +66,35 @@ def get(self, differ):
try:
func = self.differs[differ]
except KeyError:
self.send_error(404)
self.send_error(404,
reason=f'Unknown diffing method: `{differ}`. '
f'You can get a list of '
f'supported differs from '
f'the `/` endpoint.')
return

# If params repeat, take last one. Decode bytes into unicode strings.
query_params = {k: v[-1].decode() for k, v in
self.request.arguments.items()}
a = query_params.pop('a')
b = query_params.pop('b')

try:
a = query_params.pop('a')
b = query_params.pop('b')
except KeyError:
self.send_error(
400,
reason='Malformed request. '
'You must provide a URL as the value '
'for both `a` and `b` query parameters.')
return
# Fetch server response for URLs a and b.
res_a, res_b = yield [client.fetch(a), client.fetch(b)]
res_a, res_b = yield [client.fetch(a, raise_error=False),
client.fetch(b, raise_error=False)]

try:
self.check_response_for_error(res_a)
self.check_response_for_error(res_b)
except tornado.httpclient.HTTPError:
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m pretty sure this is a logic error. In check_response_for_error(), we have:

if response.error is not None:
    try:
        response.rethrow()
    except (ValueError, IOError):
        # send back the right error response
        raise tornado.httpclient.HTTPError(0)
    # But we didn’t handle tornado.httpclient.HTTPError errors!

If response.rethrow() raised a tornado.httpclient.HTTPError, it won’t ever get an error response because we didn’t handle it inside check_response_for_error(), but we still stopped here in DiffHandler.get() as if we did. It basically stops here, but never sends the error message. You can try this by adding a test:

def test_a_is_404(self):
        response = self.fetch(f'/html_token?format=json&include=all&a=http://httpstat.us/404&b=https://example.org')
        print(f'STATUS:: {response.code}')
        print('BODY:::: "%s"' % response.body.decode('utf-8'))
        self.assertEqual(response.code, 400)
        self.json_check(response)

…which fails and prints out:

STATUS:: 200
BODY:::: ""


# Validate response bytes against hash, if provided.
for query_param, res in zip(('a_hash', 'b_hash'), (res_a, res_b)):
Expand All @@ -90,7 +107,7 @@ def get(self, differ):
actual_hash = hashlib.sha256(res.body).hexdigest()
if actual_hash != expected_hash:
self.send_error(
500, reason="Fetched content does not match hash.")
500, reason='Fetched content does not match hash.')
return

# TODO Add caching of fetched URIs.
Expand Down Expand Up @@ -124,6 +141,25 @@ def write_error(self, status_code, **kwargs):
self.set_status(response['code'])
self.finish(response)

def check_response_for_error(self, response):
# Check if the HTTP requests were successful and handle exceptions
if response.error is not None:
try:
response.rethrow()
except (ValueError, OSError, tornado.httpclient.HTTPError):
# Response code == 599 means that
# no HTTP response was received.
# In this case the error code should
# become 400 indicating that the error was
# raised because of a bad request parameter.
if response.code == 599:
self.send_error(
400, reason=str(response.error))
else:
self.send_error(
response.code, reason=str(response.error))
raise tornado.httpclient.HTTPError(0)


def _extract_encoding(headers, content):
encoding = None
Expand Down Expand Up @@ -226,7 +262,6 @@ def get(self):


def make_app():

class BoundDiffHandler(DiffHandler):
differs = DIFF_ROUTES

Expand Down
90 changes: 90 additions & 0 deletions web_monitoring/tests/test_diffing_server_exc_handling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import json
from tornado.testing import AsyncHTTPTestCase
import web_monitoring.diffing_server as df
from web_monitoring.diff_errors import UndecodableContentError


class DiffingServerExceptionHandlingTest(AsyncHTTPTestCase):

def get_app(self):
return df.make_app()

def test_invalid_url_a_format(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=example.org&b=https://example.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_invalid_url_b_format(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=https://example.org&b=example.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_invalid_diffing_method(self):
response = self.fetch(f'/non_existing?format=json&include=all&'
f'a=example.org&b=https://example.org')
self.json_check(response)
self.assertEqual(response.code, 404)

def test_missing_url_a(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'b=https://example.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_missing_url_b(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=https://example.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_not_reachable_url_a(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=https://eeexample.org&b=https://example.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_not_reachable_url_b(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=https://example.org&b=https://eeexample.org')
self.json_check(response)
self.assertEqual(response.code, 400)

def test_missing_params_caller_func(self):
response = self.fetch('http://example.org/')
with self.assertRaises(KeyError):
df.caller(mock_diffing_method, response, response)

def test_undecodable_content(self):
response = self.fetch('https://www.cl.cam.ac.uk/~mgk25/'
'ucs/examples/UTF-8-test.txt')
with self.assertRaises(UndecodableContentError):
df._decode_body(response, 'a', False)

def test_fetch_undecodable_content(self):
response = self.fetch(f'/html_token?format=json&include=all&'
f'a=https://example.org&'
f'b=https://www.cl.cam.ac.uk'
f'/~mgk25/ucs/examples/UTF-8-test.txt')
self.json_check(response)
self.assertEqual(response.code, 422)

def json_check(self, response):
json_header = response.headers.get('Content-Type').split(';')
self.assertEqual(json_header[0], 'application/json')

json_response = json.loads(response.body)
self.assertTrue(isinstance(json_response['code'], int))
self.assertTrue(isinstance(json_response['error'], str))

def test_a_is_404(self):
response = self.fetch(f'/html_token?format=json&include=all'
f'&a=http://httpstat.us/404'
f'&b=https://example.org')
self.assertEqual(response.code, 404)
self.json_check(response)


def mock_diffing_method(c_body):
return