From 345418419d6f39ce4c85b83b86d589e4fd398b43 Mon Sep 17 00:00:00 2001 From: Junyeong Jeong Date: Fri, 16 Oct 2020 18:09:21 +0900 Subject: [PATCH] Fix HttpPayloadParser dealing with chunked response (#4630) (#4846) * Parse the last CRLF of chunked response correctly (#4630) If the last CRLF or only the LF are received via separate TCP segment, HTTPPayloadParser misjudges that trailers should come after 0\r\n in the chunked response body. In this case, HttpPayloadParser starts waiting for trailers, but the only remaining data to be received is CRLF. Thus, HttpPayloadParser waits trailers indefinitely and this incurs TimeoutError in user code. However, if the connection is keep alive disabled, this problem is not reproduced because the server shutdown the connection explicitly after sending all data. If the connection is closed .feed_eof is called and it helps HttpPayloadParser finish its waiting. Co-authored-by: JustAnotherArchivist Co-authored-by: Sviatoslav Sydorenko Co-authored-by: Andrew Svetlov --- CHANGES/4630.bugfix | 1 + CONTRIBUTORS.txt | 1 + aiohttp/http_parser.py | 17 +++++++++-- tests/test_http_parser.py | 61 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 CHANGES/4630.bugfix diff --git a/CHANGES/4630.bugfix b/CHANGES/4630.bugfix new file mode 100644 index 00000000000..65d783be049 --- /dev/null +++ b/CHANGES/4630.bugfix @@ -0,0 +1 @@ +Handle the last CRLF correctly even if it is received via separate TCP segment. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e6e88b9c5da..e512f4857d0 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -152,6 +152,7 @@ Julia Tsemusheva Julien Duponchelle Jungkook Park Junjie Tao +Junyeong Jeong Justas Trimailovas Justin Foo Justin Turner Arthur diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 48d3ab56885..462b03e4872 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -667,12 +667,23 @@ def feed_data(self, # we should get another \r\n otherwise # trailers needs to be skiped until \r\n\r\n if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: - if chunk[:2] == SEP: + head = chunk[:2] + if head == SEP: # end of stream self.payload.feed_eof() return True, chunk[2:] - else: - self._chunk = ChunkState.PARSE_TRAILERS + # Both CR and LF, or only LF may not be received yet. It is + # expected that CRLF or LF will be shown at the very first + # byte next time, otherwise trailers should come. The last + # CRLF which marks the end of response might not be + # contained in the same TCP segment which delivered the + # size indicator. + if not head: + return False, b'' + if head == SEP[:1]: + self._chunk_tail = head + return False, b'' + self._chunk = ChunkState.PARSE_TRAILERS # read and discard trailer up to the CRLF terminator if self._chunk == ChunkState.PARSE_TRAILERS: diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 141eaba13ab..ea996657316 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -826,6 +826,67 @@ async def test_parse_chunked_payload_size_error(self, stream) -> None: assert isinstance(out.exception(), http_exceptions.TransferEncodingError) + async def test_parse_chunked_payload_split_end(self, protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end2(self, protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n\r') + p.feed_data(b'\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n') + p.feed_data(b'\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers2(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r') + p.feed_data(b'\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers3(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\nContent-MD5: ') + p.feed_data(b'912ec803b2ce49e4a541068d495ab570\r\n\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers4(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n' + b'C') + p.feed_data(b'ontent-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + async def test_http_payload_parser_length(self, stream) -> None: out = aiohttp.FlowControlDataQueue(stream, loop=asyncio.get_event_loop())