From 693f2cef79156e7d1bd33419520bb31fc5dc8016 Mon Sep 17 00:00:00 2001 From: Junyeong Jeong Date: Wed, 10 Jun 2020 16:33:00 +0900 Subject: [PATCH] Parse the last CRLF of chunked response correctly (#4630) If the last CRLF or only the LF are received via separate TCP segment, HTTPPayloadParser misjudges that trailers should come after 0\r\n in the chunked response body. In this case, HttpPayloadParser starts waiting for trailers, but the only remaining data to be received is CRLF. Thus, HttpPayloadParser waits trailers indefinitely and this incurs TimeoutError in user code. However, if the connection is keep alive disabled, this problem is not reproduced because the server shutdown the connection explicitly after sending all data. If the connection is closed .feed_eof is called and it helps HttpPayloadParser finish its waiting. --- CHANGES/4630.bugfix | 1 + CONTRIBUTORS.txt | 1 + aiohttp/http_parser.py | 17 ++++++++++++++--- 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 CHANGES/4630.bugfix diff --git a/CHANGES/4630.bugfix b/CHANGES/4630.bugfix new file mode 100644 index 00000000000..65d783be049 --- /dev/null +++ b/CHANGES/4630.bugfix @@ -0,0 +1 @@ +Handle the last CRLF correctly even if it is received via separate TCP segment. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index b40c107f6cd..f95ef97f8bd 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -153,6 +153,7 @@ Julia Tsemusheva Julien Duponchelle Jungkook Park Junjie Tao +Junyeong Jeong Justas Trimailovas Justin Foo Justin Turner Arthur diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 64e4b30c22e..c4ccf6af10b 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -668,12 +668,23 @@ def feed_data(self, # we should get another \r\n otherwise # trailers needs to be skiped until \r\n\r\n if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: - if chunk[:2] == SEP: + head = chunk[:2] + if head == SEP: # end of stream self.payload.feed_eof() return True, chunk[2:] - else: - self._chunk = ChunkState.PARSE_TRAILERS + # Both CR and LF, or only LF may not be received yet. It is + # expected that CRLF or LF will be shown at the very first + # byte next time, otherwise trailers should come. The last + # CRLF which marks the end of response might not be + # contained in the same TCP segment which delivered the + # size indicator. + if not head: + return False, b'' + if head == SEP[:1]: + self._chunk_tail = head + return False, b'' + self._chunk = ChunkState.PARSE_TRAILERS # read and discard trailer up to the CRLF terminator if self._chunk == ChunkState.PARSE_TRAILERS: