Skip to content

Commit 2e46f24

Browse files
Validate HTTP header-field more completely
This was brought about by certain whitespace characters being allowed that are not allowed in the HTTP standard. Waitress would dutifully strip those whitespace characters and continue on as if nothing mattered, however whitespace in HTTP messages does matter and could allow for HTTP request smuggling if the front-end proxy server does not agree with the back-end server on how to parse a HTTP message. This disallows things like this: Content-Length: 10 Transfer-Encoding:[0x0b]chunked Which would get parsed by a front-end server as a request with Content-Length 10, and an invalid Transfer-Encoding header, but would get parsed as a chunked request by Waitress.
1 parent 2a11d68 commit 2e46f24

File tree

2 files changed

+109
-22
lines changed

2 files changed

+109
-22
lines changed

waitress/parser.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ServerNotImplemented,
3030
find_double_newline,
3131
)
32+
from .rfc7230 import HEADER_FIELD
3233

3334

3435
class ParsingError(Exception):
@@ -38,7 +39,6 @@ class ParsingError(Exception):
3839
class TransferEncodingNotImplemented(Exception):
3940
pass
4041

41-
4242
class HTTPRequestParser(object):
4343
"""A structure that collects the HTTP request.
4444
@@ -208,26 +208,27 @@ def parse_header(self, header_plus):
208208

209209
headers = self.headers
210210
for line in lines:
211-
index = line.find(b":")
212-
if index > 0:
213-
key = line[:index]
214-
215-
if key != key.strip():
216-
raise ParsingError("Invalid whitespace after field-name")
217-
218-
if b"_" in key:
219-
continue
220-
value = line[index + 1 :].strip()
221-
key1 = tostr(key.upper().replace(b"-", b"_"))
222-
# If a header already exists, we append subsequent values
223-
# seperated by a comma. Applications already need to handle
224-
# the comma seperated values, as HTTP front ends might do
225-
# the concatenation for you (behavior specified in RFC2616).
226-
try:
227-
headers[key1] += tostr(b", " + value)
228-
except KeyError:
229-
headers[key1] = tostr(value)
230-
# else there's garbage in the headers?
211+
header = HEADER_FIELD.match(line)
212+
213+
if not header:
214+
raise ParsingError("Invalid header")
215+
216+
key, value = header.group('name', 'value')
217+
218+
if b"_" in key:
219+
# TODO(xistence): Should we drop this request instead?
220+
continue
221+
222+
value = value.strip()
223+
key1 = tostr(key.upper().replace(b"-", b"_"))
224+
# If a header already exists, we append subsequent values
225+
# seperated by a comma. Applications already need to handle
226+
# the comma seperated values, as HTTP front ends might do
227+
# the concatenation for you (behavior specified in RFC2616).
228+
try:
229+
headers[key1] += tostr(b", " + value)
230+
except KeyError:
231+
headers[key1] = tostr(value)
231232

232233
# command, uri, version will be bytes
233234
command, uri, version = crack_first_line(first_line)
@@ -352,6 +353,9 @@ def get_header_lines(header):
352353
r = []
353354
lines = header.split(b"\r\n")
354355
for line in lines:
356+
if not line:
357+
continue
358+
355359
if b"\r" in line or b"\n" in line:
356360
raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))
357361

waitress/tests/test_parser.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,93 @@ def test_parse_header_invalid_whitespace(self):
308308
try:
309309
self.parser.parse_header(data)
310310
except ParsingError as e:
311-
self.assertIn("Invalid whitespace after field-name", e.args[0])
311+
self.assertIn("Invalid header", e.args[0])
312312
else: # pragma: nocover
313313
self.assertTrue(False)
314314

315+
def test_parse_header_invalid_whitespace_vtab(self):
316+
from waitress.parser import ParsingError
317+
318+
data = b"GET /foobar HTTP/1.1\r\nfoo:\x0bbar\r\n"
319+
try:
320+
self.parser.parse_header(data)
321+
except ParsingError as e:
322+
self.assertIn("Invalid header", e.args[0])
323+
else: # pragma: nocover
324+
self.assertTrue(False)
325+
326+
def test_parse_header_invalid_no_colon(self):
327+
from waitress.parser import ParsingError
328+
329+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nnotvalid\r\n"
330+
try:
331+
self.parser.parse_header(data)
332+
except ParsingError as e:
333+
self.assertIn("Invalid header", e.args[0])
334+
else: # pragma: nocover
335+
self.assertTrue(False)
336+
337+
def test_parse_header_invalid_folding_spacing(self):
338+
from waitress.parser import ParsingError
339+
340+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\t\x0bbaz\r\n"
341+
try:
342+
self.parser.parse_header(data)
343+
except ParsingError as e:
344+
self.assertIn("Invalid header", e.args[0])
345+
else: # pragma: nocover
346+
self.assertTrue(False)
347+
348+
def test_parse_header_invalid_chars(self):
349+
from waitress.parser import ParsingError
350+
351+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n"
352+
try:
353+
self.parser.parse_header(data)
354+
except ParsingError as e:
355+
self.assertIn("Invalid header", e.args[0])
356+
else: # pragma: nocover
357+
self.assertTrue(False)
358+
359+
def test_parse_header_empty(self):
360+
from waitress.parser import ParsingError
361+
362+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nempty:\r\n"
363+
self.parser.parse_header(data)
364+
365+
self.assertIn("EMPTY", self.parser.headers)
366+
self.assertIn("FOO", self.parser.headers)
367+
self.assertEqual(self.parser.headers["EMPTY"], "")
368+
self.assertEqual(self.parser.headers["FOO"], "bar")
369+
370+
def test_parse_header_multiple_values(self):
371+
from waitress.parser import ParsingError
372+
373+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever, more, please, yes\r\n"
374+
self.parser.parse_header(data)
375+
376+
self.assertIn("FOO", self.parser.headers)
377+
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
378+
379+
def test_parse_header_multiple_values_header_folded(self):
380+
from waitress.parser import ParsingError
381+
382+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more, please, yes\r\n"
383+
self.parser.parse_header(data)
384+
385+
self.assertIn("FOO", self.parser.headers)
386+
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
387+
388+
def test_parse_header_multiple_values_header_folded_multiple(self):
389+
from waitress.parser import ParsingError
390+
391+
data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more\r\nfoo: please, yes\r\n"
392+
self.parser.parse_header(data)
393+
394+
self.assertIn("FOO", self.parser.headers)
395+
self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes")
396+
397+
315398

316399
class Test_split_uri(unittest.TestCase):
317400
def _callFUT(self, uri):

0 commit comments

Comments
 (0)