Skip to content

Commit

Permalink
Skip the preamble in MultipartReader.
Browse files Browse the repository at this point in the history
This updates the MultipartReader to ignore the preamble of a multipart
message.  To quote the RFC:

> There appears to be room for additional information prior to the first
> boundary delimiter line and following the final boundary delimiter line.
> These areas should generally be left blank, and implementations must
> ignore anything that appears before the first boundary delimiter line or
> after the last one.

To do this, the MultipartReader now acts slightly differently at the
beginning of the file.  Instead of looking for a boundary and excepting
if it doesn't find one, it will skip over the initial data looking for
the first boundary and start reading from there.

If it doesn't find any boundary, it will except similar to how it did
before.

Fixes #880
  • Loading branch information
obmarg committed May 19, 2016
1 parent 15cdce8 commit c232c23
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
22 changes: 21 additions & 1 deletion aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ def __init__(self, headers, content):
self._content = content
self._last_part = None
self._at_eof = False
self._at_bof = True
self._unread = []

@asyncio.coroutine
Expand Down Expand Up @@ -544,10 +545,15 @@ def at_eof(self):
@asyncio.coroutine
def next(self):
"""Emits the next multipart body part."""
# So, if we're at BOF, we need to skip till the boundary.
if self._at_eof:
return
yield from self._maybe_release_last_part()
yield from self._read_boundary()
if self._at_bof:
yield from self._read_until_first_boundary()
self._at_bof = False
else:
yield from self._read_boundary()
if self._at_eof: # we just read the last boundary, nothing to do there
return
self._last_part = yield from self.fetch_next_part()
Expand Down Expand Up @@ -605,6 +611,20 @@ def _readline(self):
return self._unread.pop()
return (yield from self._content.readline())

@asyncio.coroutine
def _read_until_first_boundary(self):
while True:
chunk = yield from self._readline()
if chunk == b'':
raise ValueError("Could not find starting boundary %r"
% (self._boundary))
chunk = chunk.rstrip()
if chunk == self._boundary:
return
elif chunk == self._boundary + b'--':
self._at_eof = True
return

@asyncio.coroutine
def _read_boundary(self):
chunk = (yield from self._readline()).rstrip()
Expand Down
18 changes: 18 additions & 0 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,24 @@ def test_read_chunk_from_stream_doesnt_breaks_reader(self):
body_parts.append(read_part)
self.assertListEqual(body_parts, [b'chunk', b'two_chunks'])

def test_reading_skips_prelude(self):
reader = aiohttp.multipart.MultipartReader(
{CONTENT_TYPE: 'multipart/related;boundary=":"'},
Stream(b'Multi-part data is not supported.\r\n'
b'\r\n'
b'--:\r\n'
b'\r\n'
b'test\r\n'
b'--:\r\n'
b'\r\n'
b'passed\r\n'
b'--:--'))
first = yield from reader.next()
self.assertIsInstance(first, aiohttp.multipart.BodyPartReader)
second = yield from reader.next()
self.assertTrue(first.at_eof())
self.assertFalse(second.at_eof())


class BodyPartWriterTestCase(unittest.TestCase):

Expand Down

0 comments on commit c232c23

Please sign in to comment.