From f2afa2f054ba9e6c5d142e00233f0073925e7893 Mon Sep 17 00:00:00 2001 From: Sviatoslav Sydorenko Date: Thu, 25 Feb 2021 18:02:07 +0100 Subject: [PATCH] Fix how pure-Python HTTP parser interprets `//` --- CHANGES/5498.bugfix | 6 ++++++ aiohttp/http_parser.py | 14 +++++++++++++- tests/test_http_parser.py | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 CHANGES/5498.bugfix diff --git a/CHANGES/5498.bugfix b/CHANGES/5498.bugfix new file mode 100644 index 00000000000..c11630e8743 --- /dev/null +++ b/CHANGES/5498.bugfix @@ -0,0 +1,6 @@ +Fix interpretation difference of the pure-Python and the Cython-based +HTTP parsers construct a ``yarl.URL`` object for HTTP request-target. + +Before this fix, the Python parser would turn the URI's absolute-path +for ``//some-path`` into ``/`` while the Cython code preserved it as +``//some-path``. Now, both do the latter. diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 9487378da05..1045b6c0926 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -522,6 +522,9 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: "Status line is too long", str(self.max_line_size), str(len(path)) ) + path_part, _hash_separator, url_fragment = path.partition("#") + path_part, _question_mark_separator, qs_part = path_part.partition("?") + # method if not METHRE.match(method): raise BadStatusLine(method) @@ -562,7 +565,16 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: compression, upgrade, chunked, - URL(path), + # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based + # NOTE: parser does, otherwise it results into the same + # NOTE: HTTP Request-Line input producing different + # NOTE: `yarl.URL()` objects + URL.build( + path=path_part, + query_string=qs_part, + fragment=url_fragment, + encoded=True, + ), ) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 77c62addf6b..172d7bc30cf 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -530,6 +530,7 @@ def test_http_request_parser_two_slashes(parser: Any) -> None: assert msg.method == "GET" assert msg.path == "//path" + assert msg.url.path == "//path" assert msg.version == (1, 1) assert not msg.should_close assert msg.compression is None