Skip to content

Commit

Permalink
Fix HttpPayloadParser dealing with chunked response (#4630) (#4846)
Browse files Browse the repository at this point in the history
* Parse the last CRLF of chunked response correctly (#4630)

If the last CRLF or only the LF are received via separate TCP segment,
HTTPPayloadParser misjudges that trailers should come after 0\r\n in the
chunked response body.

In this case, HttpPayloadParser starts waiting for trailers, but the only
remaining data to be received is CRLF. Thus, HttpPayloadParser waits trailers
indefinitely and this incurs TimeoutError in user code.

However, if the connection is keep alive disabled, this problem is not
reproduced because the server shutdown the connection explicitly after sending
all data. If the connection is closed .feed_eof is called and it helps
HttpPayloadParser finish its waiting.

Co-authored-by: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Co-authored-by: Sviatoslav Sydorenko <wk.cvs.github@sydorenko.org.ua>
Co-authored-by: Andrew Svetlov <andrew.svetlov@gmail.com>
  • Loading branch information
4 people authored Oct 16, 2020
1 parent bf9e6a5 commit 3454184
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGES/4630.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Handle the last CRLF correctly even if it is received via separate TCP segment.
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Julia Tsemusheva
Julien Duponchelle
Jungkook Park
Junjie Tao
Junyeong Jeong
Justas Trimailovas
Justin Foo
Justin Turner Arthur
Expand Down
17 changes: 14 additions & 3 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,12 +667,23 @@ def feed_data(self,
# we should get another \r\n otherwise
# trailers needs to be skiped until \r\n\r\n
if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
if chunk[:2] == SEP:
head = chunk[:2]
if head == SEP:
# end of stream
self.payload.feed_eof()
return True, chunk[2:]
else:
self._chunk = ChunkState.PARSE_TRAILERS
# Both CR and LF, or only LF may not be received yet. It is
# expected that CRLF or LF will be shown at the very first
# byte next time, otherwise trailers should come. The last
# CRLF which marks the end of response might not be
# contained in the same TCP segment which delivered the
# size indicator.
if not head:
return False, b''
if head == SEP[:1]:
self._chunk_tail = head
return False, b''
self._chunk = ChunkState.PARSE_TRAILERS

# read and discard trailer up to the CRLF terminator
if self._chunk == ChunkState.PARSE_TRAILERS:
Expand Down
61 changes: 61 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,67 @@ async def test_parse_chunked_payload_size_error(self, stream) -> None:
assert isinstance(out.exception(),
http_exceptions.TransferEncodingError)

async def test_parse_chunked_payload_split_end(self, protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\n')
p.feed_data(b'\r\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_parse_chunked_payload_split_end2(self, protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\n\r')
p.feed_data(b'\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_parse_chunked_payload_split_end_trailers(self,
protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\n')
p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n')
p.feed_data(b'\r\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_parse_chunked_payload_split_end_trailers2(self,
protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\n')
p.feed_data(b'Content-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r')
p.feed_data(b'\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_parse_chunked_payload_split_end_trailers3(self,
protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\nContent-MD5: ')
p.feed_data(b'912ec803b2ce49e4a541068d495ab570\r\n\r\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_parse_chunked_payload_split_end_trailers4(self,
protocol) -> None:
out = aiohttp.StreamReader(protocol, loop=None)
p = HttpPayloadParser(out, chunked=True)
p.feed_data(b'4\r\nasdf\r\n0\r\n'
b'C')
p.feed_data(b'ontent-MD5: 912ec803b2ce49e4a541068d495ab570\r\n\r\n')

assert out.is_eof()
assert b'asdf' == b''.join(out._buffer)

async def test_http_payload_parser_length(self, stream) -> None:
out = aiohttp.FlowControlDataQueue(stream,
loop=asyncio.get_event_loop())
Expand Down

0 comments on commit 3454184

Please sign in to comment.