Skip to content

Commit b39b892

Browse files
authored
Merge pull request #3497 from bdarnell/multipart-log-spam
httputil: Raise errors instead of logging in multipart/form-data parsing
2 parents ae4a4e4 + cc61050 commit b39b892

File tree

4 files changed

+34
-30
lines changed

4 files changed

+34
-30
lines changed

tornado/httputil.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
3535

3636
from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
37-
from tornado.log import gen_log
3837
from tornado.util import ObjectDict, unicode_type
3938

4039

@@ -884,25 +883,22 @@ def parse_body_arguments(
884883
"""
885884
if content_type.startswith("application/x-www-form-urlencoded"):
886885
if headers and "Content-Encoding" in headers:
887-
gen_log.warning(
888-
"Unsupported Content-Encoding: %s", headers["Content-Encoding"]
886+
raise HTTPInputError(
887+
"Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
889888
)
890-
return
891889
try:
892890
# real charset decoding will happen in RequestHandler.decode_argument()
893891
uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
894892
except Exception as e:
895-
gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
896-
uri_arguments = {}
893+
raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
897894
for name, values in uri_arguments.items():
898895
if values:
899896
arguments.setdefault(name, []).extend(values)
900897
elif content_type.startswith("multipart/form-data"):
901898
if headers and "Content-Encoding" in headers:
902-
gen_log.warning(
903-
"Unsupported Content-Encoding: %s", headers["Content-Encoding"]
899+
raise HTTPInputError(
900+
"Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
904901
)
905-
return
906902
try:
907903
fields = content_type.split(";")
908904
for field in fields:
@@ -911,9 +907,9 @@ def parse_body_arguments(
911907
parse_multipart_form_data(utf8(v), body, arguments, files)
912908
break
913909
else:
914-
raise ValueError("multipart boundary not found")
910+
raise HTTPInputError("multipart boundary not found")
915911
except Exception as e:
916-
gen_log.warning("Invalid multipart/form-data: %s", e)
912+
raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
917913

918914

919915
def parse_multipart_form_data(
@@ -942,26 +938,22 @@ def parse_multipart_form_data(
942938
boundary = boundary[1:-1]
943939
final_boundary_index = data.rfind(b"--" + boundary + b"--")
944940
if final_boundary_index == -1:
945-
gen_log.warning("Invalid multipart/form-data: no final boundary")
946-
return
941+
raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
947942
parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
948943
for part in parts:
949944
if not part:
950945
continue
951946
eoh = part.find(b"\r\n\r\n")
952947
if eoh == -1:
953-
gen_log.warning("multipart/form-data missing headers")
954-
continue
948+
raise HTTPInputError("multipart/form-data missing headers")
955949
headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
956950
disp_header = headers.get("Content-Disposition", "")
957951
disposition, disp_params = _parse_header(disp_header)
958952
if disposition != "form-data" or not part.endswith(b"\r\n"):
959-
gen_log.warning("Invalid multipart/form-data")
960-
continue
953+
raise HTTPInputError("Invalid multipart/form-data")
961954
value = part[eoh + 4 : -2]
962955
if not disp_params.get("name"):
963-
gen_log.warning("multipart/form-data value missing name")
964-
continue
956+
raise HTTPInputError("multipart/form-data missing name")
965957
name = disp_params["name"]
966958
if disp_params.get("filename"):
967959
ctype = headers.get("Content-Type", "application/unknown")

tornado/test/httpserver_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,9 +1148,9 @@ def test_gzip_unsupported(self):
11481148
# Gzip support is opt-in; without it the server fails to parse
11491149
# the body (but parsing form bodies is currently just a log message,
11501150
# not a fatal error).
1151-
with ExpectLog(gen_log, "Unsupported Content-Encoding"):
1151+
with ExpectLog(gen_log, ".*Unsupported Content-Encoding"):
11521152
response = self.post_gzip("foo=bar")
1153-
self.assertEqual(json_decode(response.body), {})
1153+
self.assertEqual(response.code, 400)
11541154

11551155

11561156
class StreamingChunkSizeTest(AsyncHTTPTestCase):

tornado/test/httputil_test.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
)
1313
from tornado.escape import utf8, native_str
1414
from tornado.log import gen_log
15-
from tornado.testing import ExpectLog
1615
from tornado.test.util import ignore_deprecation
1716

1817
import copy
@@ -195,7 +194,9 @@ def test_missing_headers(self):
195194
b"\n", b"\r\n"
196195
)
197196
args, files = form_data_args()
198-
with ExpectLog(gen_log, "multipart/form-data missing headers"):
197+
with self.assertRaises(
198+
HTTPInputError, msg="multipart/form-data missing headers"
199+
):
199200
parse_multipart_form_data(b"1234", data, args, files)
200201
self.assertEqual(files, {})
201202

@@ -209,7 +210,7 @@ def test_invalid_content_disposition(self):
209210
b"\n", b"\r\n"
210211
)
211212
args, files = form_data_args()
212-
with ExpectLog(gen_log, "Invalid multipart/form-data"):
213+
with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
213214
parse_multipart_form_data(b"1234", data, args, files)
214215
self.assertEqual(files, {})
215216

@@ -222,7 +223,7 @@ def test_line_does_not_end_with_correct_line_break(self):
222223
b"\n", b"\r\n"
223224
)
224225
args, files = form_data_args()
225-
with ExpectLog(gen_log, "Invalid multipart/form-data"):
226+
with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
226227
parse_multipart_form_data(b"1234", data, args, files)
227228
self.assertEqual(files, {})
228229

@@ -236,7 +237,9 @@ def test_content_disposition_header_without_name_parameter(self):
236237
b"\n", b"\r\n"
237238
)
238239
args, files = form_data_args()
239-
with ExpectLog(gen_log, "multipart/form-data value missing name"):
240+
with self.assertRaises(
241+
HTTPInputError, msg="multipart/form-data value missing name"
242+
):
240243
parse_multipart_form_data(b"1234", data, args, files)
241244
self.assertEqual(files, {})
242245

tornado/web.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,14 @@ async def _execute(
18011801
try:
18021802
if self.request.method not in self.SUPPORTED_METHODS:
18031803
raise HTTPError(405)
1804+
1805+
# If we're not in stream_request_body mode, this is the place where we parse the body.
1806+
if not _has_stream_request_body(self.__class__):
1807+
try:
1808+
self.request._parse_body()
1809+
except httputil.HTTPInputError as e:
1810+
raise HTTPError(400, "Invalid body: %s" % e) from e
1811+
18041812
self.path_args = [self.decode_argument(arg) for arg in args]
18051813
self.path_kwargs = {
18061814
k: self.decode_argument(v, name=k) for (k, v) in kwargs.items()
@@ -1992,7 +2000,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool:
19922000

19932001

19942002
def removeslash(
1995-
method: Callable[..., Optional[Awaitable[None]]]
2003+
method: Callable[..., Optional[Awaitable[None]]],
19962004
) -> Callable[..., Optional[Awaitable[None]]]:
19972005
"""Use this decorator to remove trailing slashes from the request path.
19982006
@@ -2021,7 +2029,7 @@ def wrapper( # type: ignore
20212029

20222030

20232031
def addslash(
2024-
method: Callable[..., Optional[Awaitable[None]]]
2032+
method: Callable[..., Optional[Awaitable[None]]],
20252033
) -> Callable[..., Optional[Awaitable[None]]]:
20262034
"""Use this decorator to add a missing trailing slash to the request path.
20272035
@@ -2445,8 +2453,9 @@ def finish(self) -> None:
24452453
if self.stream_request_body:
24462454
future_set_result_unless_cancelled(self.request._body_future, None)
24472455
else:
2456+
# Note that the body gets parsed in RequestHandler._execute so it can be in
2457+
# the right exception handler scope.
24482458
self.request.body = b"".join(self.chunks)
2449-
self.request._parse_body()
24502459
self.execute()
24512460

24522461
def on_connection_close(self) -> None:
@@ -3332,7 +3341,7 @@ def transform_chunk(self, chunk: bytes, finishing: bool) -> bytes:
33323341

33333342

33343343
def authenticated(
3335-
method: Callable[..., Optional[Awaitable[None]]]
3344+
method: Callable[..., Optional[Awaitable[None]]],
33363345
) -> Callable[..., Optional[Awaitable[None]]]:
33373346
"""Decorate methods with this to require that the user be logged in.
33383347

0 commit comments

Comments
 (0)