6060
6161ASCIISET : Final [Set [str ]] = set (string .printable )
6262
63- # See https://tools.ietf .org/html/rfc7230#section-3.1.1
64- # and https://tools.ietf .org/html/rfc7230#appendix-B
63+ # See https://www.rfc-editor .org/rfc/rfc9110.html#name-overview
64+ # and https://www.rfc-editor .org/rfc/rfc9110.html#name-tokens
6565#
6666# method = token
6767# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
6868# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
6969# token = 1*tchar
7070METHRE : Final [Pattern [str ]] = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+" )
71- VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d+ ).(\d+ )" )
72- HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\" ]" )
71+ VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d).(\d)" )
72+ HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\ ]" )
7373
7474
7575class RawRequestMessage (NamedTuple ):
@@ -148,8 +148,11 @@ def parse_headers(
148148 except ValueError :
149149 raise InvalidHeader (line ) from None
150150
151- bname = bname .strip (b" \t " )
152- bvalue = bvalue .lstrip ()
151+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
152+ if {bname [0 ], bname [- 1 ]} & {32 , 9 }: # {" ", "\t"}
153+ raise InvalidHeader (line )
154+
155+ bvalue = bvalue .lstrip (b" \t " )
153156 if HDRRE .search (bname ):
154157 raise InvalidHeader (bname )
155158 if len (bname ) > self .max_field_size :
@@ -170,6 +173,7 @@ def parse_headers(
170173 # consume continuation lines
171174 continuation = line and line [0 ] in (32 , 9 ) # (' ', '\t')
172175
176+ # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
173177 if continuation :
174178 bvalue_lst = [bvalue ]
175179 while continuation :
@@ -204,10 +208,14 @@ def parse_headers(
204208 str (header_length ),
205209 )
206210
207- bvalue = bvalue .strip ()
211+ bvalue = bvalue .strip (b" \t " )
208212 name = bname .decode ("utf-8" , "surrogateescape" )
209213 value = bvalue .decode ("utf-8" , "surrogateescape" )
210214
215+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
216+ if "\n " in value or "\r " in value or "\x00 " in value :
217+ raise InvalidHeader (bvalue )
218+
211219 headers .add (name , value )
212220 raw_headers .append ((bname , bvalue ))
213221
@@ -322,15 +330,12 @@ def get_content_length() -> Optional[int]:
322330 if length_hdr is None :
323331 return None
324332
325- try :
326- length = int ( length_hdr )
327- except ValueError :
333+ # Shouldn't allow +/- or other number formats.
334+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
335+ if not length_hdr . strip ( " \t " ). isdigit () :
328336 raise InvalidHeader (CONTENT_LENGTH )
329337
330- if length < 0 :
331- raise InvalidHeader (CONTENT_LENGTH )
332-
333- return length
338+ return int (length_hdr )
334339
335340 length = get_content_length ()
336341 # do not support old websocket spec
@@ -470,6 +475,24 @@ def parse_headers(
470475 upgrade = False
471476 chunked = False
472477
478+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
479+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
480+ singletons = (
481+ hdrs .CONTENT_LENGTH ,
482+ hdrs .CONTENT_LOCATION ,
483+ hdrs .CONTENT_RANGE ,
484+ hdrs .CONTENT_TYPE ,
485+ hdrs .ETAG ,
486+ hdrs .HOST ,
487+ hdrs .MAX_FORWARDS ,
488+ hdrs .SERVER ,
489+ hdrs .TRANSFER_ENCODING ,
490+ hdrs .USER_AGENT ,
491+ )
492+ bad_hdr = next ((h for h in singletons if len (headers .getall (h , ())) > 1 ), None )
493+ if bad_hdr is not None :
494+ raise BadHttpMessage (f"Duplicate '{ bad_hdr } ' header found." )
495+
473496 # keep-alive
474497 conn = headers .get (hdrs .CONNECTION )
475498 if conn :
@@ -523,7 +546,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
523546 # request line
524547 line = lines [0 ].decode ("utf-8" , "surrogateescape" )
525548 try :
526- method , path , version = line .split (None , 2 )
549+ method , path , version = line .split (maxsplit = 2 )
527550 except ValueError :
528551 raise BadStatusLine (line ) from None
529552
@@ -537,14 +560,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
537560 raise BadStatusLine (method )
538561
539562 # version
540- try :
541- if version .startswith ("HTTP/" ):
542- n1 , n2 = version [5 :].split ("." , 1 )
543- version_o = HttpVersion (int (n1 ), int (n2 ))
544- else :
545- raise BadStatusLine (version )
546- except Exception :
547- raise BadStatusLine (version )
563+ match = VERSRE .match (version )
564+ if match is None :
565+ raise BadStatusLine (line )
566+ version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
548567
549568 if method == "CONNECT" :
550569 # authority-form,
@@ -611,12 +630,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
611630 def parse_message (self , lines : List [bytes ]) -> RawResponseMessage :
612631 line = lines [0 ].decode ("utf-8" , "surrogateescape" )
613632 try :
614- version , status = line .split (None , 1 )
633+ version , status = line .split (maxsplit = 1 )
615634 except ValueError :
616635 raise BadStatusLine (line ) from None
617636
618637 try :
619- status , reason = status .split (None , 1 )
638+ status , reason = status .split (maxsplit = 1 )
620639 except ValueError :
621640 reason = ""
622641
@@ -632,13 +651,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
632651 version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
633652
634653 # The status code is a three-digit number
635- try :
636- status_i = int (status )
637- except ValueError :
638- raise BadStatusLine (line ) from None
639-
640- if status_i > 999 :
654+ if len (status ) != 3 or not status .isdigit ():
641655 raise BadStatusLine (line )
656+ status_i = int (status )
642657
643658 # read headers
644659 (
@@ -773,14 +788,13 @@ def feed_data(
773788 else :
774789 size_b = chunk [:pos ]
775790
776- try :
777- size = int (bytes (size_b ), 16 )
778- except ValueError :
791+ if not size_b .isdigit ():
779792 exc = TransferEncodingError (
780793 chunk [:pos ].decode ("ascii" , "surrogateescape" )
781794 )
782795 self .payload .set_exception (exc )
783- raise exc from None
796+ raise exc
797+ size = int (bytes (size_b ), 16 )
784798
785799 chunk = chunk [pos + 2 :]
786800 if size == 0 : # eof marker
0 commit comments