Skip to content

Commit

Permalink
Fix inconsistency between Python and C http request parsers. (#4973)
Browse files Browse the repository at this point in the history
They both now correctly parse URLs containing pct-encoded sequences
and work with yarl 1.6.0.

Co-authored-by: Sviatoslav Sydorenko <wk.cvs.github@sydorenko.org.ua>
Co-authored-by: Andrew Svetlov <andrew.svetlov@gmail.com>
  • Loading branch information
3 people authored Oct 21, 2020
1 parent 967b1b9 commit d321923
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGES/4972.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix inconsistency between Python and C http request parsers in parsing pct-encoded URL.
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ Sergey Ninua
Sergey Skripnick
Serhii Charykov
Serhii Kostel
Serhiy Storchaka
Simon Kennedy
Sin-Woo Bang
Stanislas Plum
Expand Down
2 changes: 1 addition & 1 deletion aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ cdef _parse_url(char* buf_data, size_t length):

return URL_build(scheme=schema,
user=user, password=password, host=host, port=port,
path=path, query=query, fragment=fragment)
path=path, query_string=query, fragment=fragment, encoded=True)
else:
raise InvalidURLError("invalid url {!r}".format(buf_data))
finally:
Expand Down
54 changes: 37 additions & 17 deletions aiohttp/web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from typing_extensions import TypedDict
from yarl import URL
from yarl import __version__ as yarl_version # type: ignore

from . import hdrs
from .abc import AbstractMatchInfo, AbstractRouter, AbstractView
Expand Down Expand Up @@ -61,6 +62,8 @@
else:
BaseDict = dict

YARL_VERSION = tuple(map(int, yarl_version.split('.')[:2]))

HTTP_METHOD_RE = re.compile(r"^[0-9A-Za-z!#\$%&'\*\+\-\.\^_`\|~]+$")
ROUTE_RE = re.compile(r'(\{[_a-zA-Z][^{}]*(?:\{[^{}]*\}[^{}]*)*\})')
PATH_SEP = re.escape('/')
Expand Down Expand Up @@ -421,9 +424,9 @@ def __init__(self, path: str, *, name: Optional[str]=None) -> None:
if '{' in part or '}' in part:
raise ValueError("Invalid path '{}'['{}']".format(path, part))

path = URL.build(path=part).raw_path
formatter += path
pattern += re.escape(path)
part = _requote_path(part)
formatter += part
pattern += re.escape(part)

try:
compiled = re.compile(pattern)
Expand Down Expand Up @@ -451,7 +454,7 @@ def _match(self, path: str) -> Optional[Dict[str, str]]:
if match is None:
return None
else:
return {key: URL.build(path=value, encoded=True).path
return {key: _unquote_path(value)
for key, value in match.groupdict().items()}

def raw_match(self, path: str) -> bool:
Expand All @@ -462,9 +465,9 @@ def get_info(self) -> _InfoDict:
'pattern': self._pattern}

def url_for(self, **parts: str) -> URL:
url = self._formatter.format_map({k: URL.build(path=v).raw_path
url = self._formatter.format_map({k: _quote_path(v)
for k, v in parts.items()})
return URL.build(path=url)
return URL.build(path=url, encoded=True)

def __repr__(self) -> str:
name = "'" + self.name + "' " if self.name is not None else ""
Expand All @@ -478,7 +481,7 @@ def __init__(self, prefix: str, *, name: Optional[str]=None) -> None:
assert not prefix or prefix.startswith('/'), prefix
assert prefix in ('', '/') or not prefix.endswith('/'), prefix
super().__init__(name=name)
self._prefix = URL.build(path=prefix).raw_path
self._prefix = _requote_path(prefix)

@property
def canonical(self) -> str:
Expand Down Expand Up @@ -535,17 +538,17 @@ def url_for(self, *, filename: Union[str, Path], # type: ignore
append_version = self._append_version
if isinstance(filename, Path):
filename = str(filename)
while filename.startswith('/'):
filename = filename[1:]
filename = '/' + filename
filename = filename.lstrip('/')

url = URL.build(path=self._prefix, encoded=True)
# filename is not encoded
url = URL.build(path=self._prefix + filename)
if YARL_VERSION < (1, 6):
url = url / filename.replace('%', '%25')
else:
url = url / filename

if append_version:
try:
if filename.startswith('/'):
filename = filename[1:]
filepath = self._directory.joinpath(filename).resolve()
if not self._follow_symlinks:
filepath.relative_to(self._directory)
Expand Down Expand Up @@ -592,8 +595,7 @@ async def resolve(self, request: Request) -> _Resolve:
if method not in allowed_methods:
return None, allowed_methods

match_dict = {'filename': URL.build(path=path[len(self._prefix)+1:],
encoded=True).path}
match_dict = {'filename': _unquote_path(path[len(self._prefix)+1:])}
return (UrlMappingMatchInfo(match_dict, self._routes[method]),
allowed_methods)

Expand Down Expand Up @@ -1032,8 +1034,7 @@ def add_resource(self, path: str, *,
if resource.name == name and resource.raw_match(path):
return cast(Resource, resource)
if not ('{' in path or '}' in path or ROUTE_RE.search(path)):
url = URL.build(path=path)
resource = PlainResource(url.raw_path, name=name)
resource = PlainResource(_requote_path(path), name=name)
self.register_resource(resource)
return resource
resource = DynamicResource(path, name=name)
Expand Down Expand Up @@ -1152,3 +1153,22 @@ def add_routes(self,
for route_def in routes:
registered_routes.extend(route_def.register(self))
return registered_routes


def _quote_path(value: str) -> str:
if YARL_VERSION < (1, 6):
value = value.replace('%', '%25')
return URL.build(path=value, encoded=False).raw_path


def _unquote_path(value: str) -> str:
return URL.build(path=value, encoded=True).path


def _requote_path(value: str) -> str:
# Quote non-ascii characters and other characters which must be quoted,
# but preserve existing %-sequences.
result = _quote_path(value)
if '%' in value:
result = result.replace('%25', '%')
return result
2 changes: 1 addition & 1 deletion requirements/ci-wheel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pytest==6.1.1
pytest-cov==2.10.1
pytest-mock==3.3.1
typing_extensions==3.7.4.3
yarl==1.4.2
yarl==1.6.1

# Using PEP 508 env markers to control dependency on runtimes:

Expand Down
50 changes: 50 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
from unittest import mock
from urllib.parse import quote

import pytest
from multidict import CIMultiDict
Expand Down Expand Up @@ -787,6 +788,55 @@ def test_url_parse_non_strict_mode(parser) -> None:
assert payload.is_eof()


@pytest.mark.parametrize(
('uri', 'path', 'query', 'fragment'),
[
('/path%23frag', '/path#frag', {}, ''),
('/path%2523frag', '/path%23frag', {}, ''),
('/path?key=value%23frag', '/path', {'key': 'value#frag'}, ''),
('/path?key=value%2523frag', '/path', {'key': 'value%23frag'}, ''),
('/path#frag%20', '/path', {}, 'frag '),
('/path#frag%2520', '/path', {}, 'frag%20'),
]
)
def test_parse_uri_percent_encoded(parser, uri, path, query, fragment) -> None:
text = ('GET %s HTTP/1.1\r\n\r\n' % (uri,)).encode()
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == uri
assert msg.url == URL(uri)
assert msg.url.path == path
assert msg.url.query == query
assert msg.url.fragment == fragment


def test_parse_uri_utf8(parser) -> None:
text = ('GET /путь?ключ=знач#фраг HTTP/1.1\r\n\r\n').encode()
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == '/путь?ключ=знач#фраг'
assert msg.url.path == '/путь'
assert msg.url.query == {'ключ': 'знач'}
assert msg.url.fragment == 'фраг'


def test_parse_uri_utf8_percent_encoded(parser) -> None:
text = (
'GET %s HTTP/1.1\r\n\r\n' %
quote('/путь?ключ=знач#фраг', safe='/?=#')
).encode()
messages, upgrade, tail = parser.feed_data(text)
msg = messages[0][0]

assert msg.path == quote('/путь?ключ=знач#фраг', safe='/?=#')
assert msg.url == URL('/путь?ключ=знач#фраг')
assert msg.url.path == '/путь'
assert msg.url.query == {'ключ': 'знач'}
assert msg.url.fragment == 'фраг'


@pytest.mark.skipif('HttpRequestParserC' not in dir(aiohttp.http_parser),
reason="C based HTTP parser not available")
def test_parse_bad_method_for_c_parser_raises(loop, protocol):
Expand Down
26 changes: 22 additions & 4 deletions tests/test_urldispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,20 @@ def test_add_static_append_version_not_follow_symlink(router,
assert '/st/append_version_symlink/data.unknown_mime_type' == str(url)


def test_add_static_quoting(router) -> None:
resource = router.add_static('/пре %2Fфикс',
pathlib.Path(aiohttp.__file__).parent,
name='static')
assert router['static'] is resource
url = resource.url_for(filename='/1 2/файл%2F.txt')
assert url.path == '/пре /фикс/1 2/файл%2F.txt'
assert str(url) == (
'/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81'
'/1%202/%D1%84%D0%B0%D0%B9%D0%BB%252F.txt'
)
assert len(resource) == 2


def test_plain_not_match(router) -> None:
handler = make_handler()
router.add_route('GET', '/get/path', handler, name='name')
Expand Down Expand Up @@ -629,10 +643,14 @@ def test_route_dynamic_with_regex(router) -> None:

def test_route_dynamic_quoting(router) -> None:
handler = make_handler()
route = router.add_route('GET', r'/{arg}', handler)

url = route.url_for(arg='1 2/текст')
assert '/1%202/%D1%82%D0%B5%D0%BA%D1%81%D1%82' == str(url)
route = router.add_route('GET', r'/пре %2Fфикс/{arg}', handler)

url = route.url_for(arg='1 2/текст%2F')
assert url.path == '/пре /фикс/1 2/текст%2F'
assert str(url) == (
'/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81'
'/1%202/%D1%82%D0%B5%D0%BA%D1%81%D1%82%252F'
)


async def test_regular_match_info(router) -> None:
Expand Down
2 changes: 1 addition & 1 deletion vendor/http-parser
Submodule http-parser updated 4 files
+6 −6 Makefile
+44 −150 http_parser.c
+5 −12 http_parser.h
+4 −132 test.c

0 comments on commit d321923

Please sign in to comment.