diff --git a/CHANGES/4630.bugfix b/CHANGES/4630.bugfix new file mode 100644 index 00000000000..65d783be049 --- /dev/null +++ b/CHANGES/4630.bugfix @@ -0,0 +1 @@ +Handle the last CRLF correctly even if it is received via separate TCP segment. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e6e88b9c5da..e512f4857d0 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -152,6 +152,7 @@ Julia Tsemusheva Julien Duponchelle Jungkook Park Junjie Tao +Junyeong Jeong Justas Trimailovas Justin Foo Justin Turner Arthur diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 48d3ab56885..462b03e4872 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -667,12 +667,23 @@ def feed_data(self, # we should get another \r\n otherwise # trailers needs to be skiped until \r\n\r\n if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: - if chunk[:2] == SEP: + head = chunk[:2] + if head == SEP: # end of stream self.payload.feed_eof() return True, chunk[2:] - else: - self._chunk = ChunkState.PARSE_TRAILERS + # Both CR and LF, or only LF may not be received yet. It is + # expected that CRLF or LF will be shown at the very first + # byte next time, otherwise trailers should come. The last + # CRLF which marks the end of response might not be + # contained in the same TCP segment which delivered the + # size indicator. + if not head: + return False, b'' + if head == SEP[:1]: + self._chunk_tail = head + return False, b'' + self._chunk = ChunkState.PARSE_TRAILERS # read and discard trailer up to the CRLF terminator if self._chunk == ChunkState.PARSE_TRAILERS: diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 141eaba13ab..ea996657316 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -826,6 +826,67 @@ async def test_parse_chunked_payload_size_error(self, stream) -> None: assert isinstance(out.exception(), http_exceptions.TransferEncodingError) + async def test_parse_chunked_payload_split_end(self, protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end2(self, protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n\r') + p.feed_data(b'\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n') + p.feed_data(b'\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers2(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n') + p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r') + p.feed_data(b'\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers3(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\nContent-MD5: ') + p.feed_data(b'912ec803b2ce49e4a541068d495ab570\r\n\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + + async def test_parse_chunked_payload_split_end_trailers4(self, + protocol) -> None: + out = aiohttp.StreamReader(protocol, loop=None) + p = HttpPayloadParser(out, chunked=True) + p.feed_data(b'4\r\nasdf\r\n0\r\n' + b'C') + p.feed_data(b'ontent-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r\n') + + assert out.is_eof() + assert b'asdf' == b''.join(out._buffer) + async def test_http_payload_parser_length(self, stream) -> None: out = aiohttp.FlowControlDataQueue(stream, loop=asyncio.get_event_loop())