Skip to content

Commit 9e4bbed

Browse files
mhilsKriechi
authored andcommitted
merge surrounding whitespace and uppercase validators into illegal character validation
1 parent 035e989 commit 9e4bbed

File tree

1 file changed

+25
-55
lines changed

1 file changed

+25
-55
lines changed

src/h2/utilities.py

Lines changed: 25 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from __future__ import annotations
88

99
import collections
10-
import re
11-
from string import whitespace
1210
from typing import TYPE_CHECKING, Any, NamedTuple
1311

1412
from hpack.struct import HeaderTuple, NeverIndexedHeaderTuple
@@ -20,7 +18,6 @@
2018

2119
from hpack.struct import Header, HeaderWeaklyTyped
2220

23-
UPPER_RE = re.compile(b"[A-Z]")
2421
SIGIL = ord(b":")
2522
INFORMATIONAL_START = ord(b"1")
2623

@@ -70,9 +67,6 @@
7067
_CONNECT_REQUEST_ONLY_HEADERS = frozenset([b":protocol"])
7168

7269

73-
_WHITESPACE = frozenset(map(ord, whitespace))
74-
75-
7670
def _secure_headers(headers: Iterable[Header],
7771
hdr_validation_flags: HeaderValidationFlags | None) -> Generator[Header, None, None]:
7872
"""
@@ -207,12 +201,6 @@ def validate_headers(headers: Iterable[Header], hdr_validation_flags: HeaderVali
207201
headers = _reject_empty_header_names(
208202
headers, hdr_validation_flags,
209203
)
210-
headers = _reject_uppercase_header_fields(
211-
headers, hdr_validation_flags,
212-
)
213-
headers = _reject_surrounding_whitespace(
214-
headers, hdr_validation_flags,
215-
)
216204
headers = _reject_te(
217205
headers, hdr_validation_flags,
218206
)
@@ -232,13 +220,16 @@ def _reject_illegal_characters(headers: Iterable[Header],
232220
hdr_validation_flags: HeaderValidationFlags) -> Generator[Header, None, None]:
233221
"""
234222
Raises a ProtocolError if any header names or values contain illegal characters.
235-
See RFC 9113, section 8.2.1.
223+
See <https://www.rfc-editor.org/rfc/rfc9113.html#section-8.2.1>.
236224
"""
237225
for header in headers:
238226
# > A field name MUST NOT contain characters in the ranges 0x00-0x20, 0x41-0x5a,
239227
# > or 0x7f-0xff (all ranges inclusive).
240228
for c in header[0]:
241-
if c <= 0x20 or 0x41 <= c <= 0x5a or 0x7f <= c:
229+
if 0x41 <= c <= 0x5a:
230+
msg = f"Received uppercase header name {header[0]!r}."
231+
raise ProtocolError(msg)
232+
if c <= 0x20 or c >= 0x7f:
242233
msg = f"Illegal character '{chr(c)}' in header name: {header[0]!r}"
243234
raise ProtocolError(msg)
244235

@@ -249,14 +240,28 @@ def _reject_illegal_characters(headers: Iterable[Header],
249240
msg = f"Illegal character ':' in header name: {header[0]!r}"
250241
raise ProtocolError(msg)
251242

252-
# > A field value MUST NOT contain the zero value (ASCII NUL, 0x00), line feed
253-
# > (ASCII LF, 0x0a), or carriage return (ASCII CR, 0x0d) at any position.
254-
for c in header[1]:
255-
if c == 0 or c == 0x0a or c == 0x0d:
256-
msg = f"Illegal character '{chr(c)}' in header value: {header[1]!r}"
243+
# For compatibility with RFC 7230 header fields, we need to allow the field
244+
# value to be an empty string. This is ludicrous, but technically allowed.
245+
if field_value := header[1]:
246+
247+
# > A field value MUST NOT contain the zero value (ASCII NUL, 0x00), line feed
248+
# > (ASCII LF, 0x0a), or carriage return (ASCII CR, 0x0d) at any position.
249+
for c in field_value:
250+
if c == 0 or c == 0x0a or c == 0x0d: # noqa: PLR1714
251+
msg = f"Illegal character '{chr(c)}' in header value: {field_value!r}"
252+
raise ProtocolError(msg)
253+
254+
# > A field value MUST NOT start or end with an ASCII whitespace character
255+
# > (ASCII SP or HTAB, 0x20 or 0x09).
256+
if (
257+
field_value[0] == 0x20 or
258+
field_value[0] == 0x09 or
259+
field_value[-1] == 0x20 or
260+
field_value[-1] == 0x09
261+
):
262+
msg = f"Received header value surrounded by whitespace {field_value!r}"
257263
raise ProtocolError(msg)
258264

259-
# Surrounding whitespace is enforced in `_reject_surrounding_whitespace`.
260265
yield header
261266

262267

@@ -275,41 +280,6 @@ def _reject_empty_header_names(headers: Iterable[Header],
275280
yield header
276281

277282

278-
def _reject_uppercase_header_fields(headers: Iterable[Header],
279-
hdr_validation_flags: HeaderValidationFlags) -> Generator[Header, None, None]:
280-
"""
281-
Raises a ProtocolError if any uppercase character is found in a header
282-
block.
283-
"""
284-
for header in headers:
285-
if UPPER_RE.search(header[0]):
286-
msg = f"Received uppercase header name {header[0]!r}."
287-
raise ProtocolError(msg)
288-
yield header
289-
290-
291-
def _reject_surrounding_whitespace(headers: Iterable[Header],
292-
hdr_validation_flags: HeaderValidationFlags) -> Generator[Header, None, None]:
293-
"""
294-
Raises a ProtocolError if any header name or value is surrounded by
295-
whitespace characters.
296-
"""
297-
# For compatibility with RFC 7230 header fields, we need to allow the field
298-
# value to be an empty string. This is ludicrous, but technically allowed.
299-
# The field name may not be empty, though, so we can safely assume that it
300-
# must have at least one character in it and throw exceptions if it
301-
# doesn't.
302-
for header in headers:
303-
if header[0][0] in _WHITESPACE or header[0][-1] in _WHITESPACE:
304-
msg = f"Received header name surrounded by whitespace {header[0]!r}"
305-
raise ProtocolError(msg)
306-
if header[1] and ((header[1][0] in _WHITESPACE) or
307-
(header[1][-1] in _WHITESPACE)):
308-
msg = f"Received header value surrounded by whitespace {header[1]!r}"
309-
raise ProtocolError(msg)
310-
yield header
311-
312-
313283
def _reject_te(headers: Iterable[Header], hdr_validation_flags: HeaderValidationFlags) -> Generator[Header, None, None]:
314284
"""
315285
Raises a ProtocolError if the TE header is present in a header block and

0 commit comments

Comments
 (0)