Skip to content

gh-105973: Ensure that http header parser returns RFC 7230 compliant header values #105994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Lib/email/_policybase.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
message_factory -- the class to use to create new message objects.
If the value is None, the default is Message.

rstrip_whitespace -- If true, trailing whitespace will be removed from
header values. Default: False

"""

raise_on_defect = False
Expand All @@ -165,6 +168,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
max_line_length = 78
mangle_from_ = False
message_factory = None
rstrip_whitespace = False

def handle_defect(self, obj, defect):
"""Based on policy, either raise defect or call register_defect.
Expand Down Expand Up @@ -300,7 +304,10 @@ def header_source_parse(self, sourcelines):
"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
return (name, value.rstrip('\r\n'))
# Should trailing whitespace be stripped from the value?
if not self.rstrip_whitespace:
return (name, value.rstrip('\r\n'))
return (name, value.rstrip('\r\n \t'))

def header_store_parse(self, name, value):
"""+
Expand Down
10 changes: 8 additions & 2 deletions Lib/email/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class EmailPolicy(Policy):
refold_source = 'long'
header_factory = HeaderRegistry()
content_manager = raw_data_manager
rstrip_whitespace = False

def __init__(self, **kw):
# Ensure that each new instance gets a unique header factory
Expand Down Expand Up @@ -126,7 +127,11 @@ def header_source_parse(self, sourcelines):
"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
return (name, value.rstrip('\r\n'))

# Should trailing whitespace be stripped from the value?
if not self.rstrip_whitespace:
return (name, value.rstrip('\r\n'))
return (name, value.rstrip('\r\n \t'))

def header_store_parse(self, name, value):
"""+
Expand Down Expand Up @@ -216,9 +221,10 @@ def _fold(self, name, value, refold_binary=False):


default = EmailPolicy()
compat = Compat32()
# Make the default policy use the class default header_factory
del default.header_factory
strict = default.clone(raise_on_defect=True)
HTTP = compat.clone(linesep='\r\n', max_line_length=None, rstrip_whitespace=True)
SMTP = default.clone(linesep='\r\n')
HTTP = default.clone(linesep='\r\n', max_line_length=None)
SMTPUTF8 = SMTP.clone(utf8=True)
3 changes: 2 additions & 1 deletion Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import sys
import collections.abc
from urllib.parse import urlsplit
from email import policy

# HTTPMessage, parse_headers(), and the HTTP status code constants are
# intentionally omitted for simplicity
Expand Down Expand Up @@ -233,7 +234,7 @@ def _parse_header_lines(header_lines, _class=HTTPMessage):

"""
hstring = b''.join(header_lines).decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring)
return email.parser.Parser(_class=_class, policy=policy.HTTP).parsestr(hstring)

def parse_headers(fp, _class=HTTPMessage):
"""Parses only RFC2822 headers from a file pointer."""
Expand Down
8 changes: 6 additions & 2 deletions Lib/test/test_email/test_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase):
'raise_on_defect': False,
'mangle_from_': True,
'message_factory': None,
'rstrip_whitespace': False,
}
# These default values are the ones set on email.policy.default.
# If any of these defaults change, the docs must be updated.
Expand All @@ -38,6 +39,7 @@ class PolicyAPITests(unittest.TestCase):
'content_manager': email.policy.EmailPolicy.content_manager,
'mangle_from_': False,
'message_factory': email.message.EmailMessage,
'rstrip_whitespace': False,
})

# For each policy under test, we give here what we expect the defaults to
Expand All @@ -52,9 +54,11 @@ class PolicyAPITests(unittest.TestCase):
email.policy.SMTPUTF8: make_defaults(policy_defaults,
{'linesep': '\r\n',
'utf8': True}),
email.policy.HTTP: make_defaults(policy_defaults,
email.policy.HTTP: make_defaults(compat32_defaults,
{'linesep': '\r\n',
'max_line_length': None}),
'max_line_length': None,
'rstrip_whitespace': True
}),
email.policy.strict: make_defaults(policy_defaults,
{'raise_on_defect': True}),
new_policy: make_defaults(policy_defaults, {}),
Expand Down
24 changes: 24 additions & 0 deletions Lib/test/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,30 @@ def test_headers_debuglevel(self):
self.assertEqual(lines[2], "header: Second: val1")
self.assertEqual(lines[3], "header: Second: val2")

def test_header_whitespace_removal(self):
request = b"Host: example.com\r\nContent-Length: 12 \r\nuser-agent: web browser \r\nset-cookie: foo 1234; \r\nset-cookie: bar 5432\r\n"
parsed = client.parse_headers(io.BytesIO(request))
set_cookie_headers = parsed.get_all("set-cookie")

# Check whether the trailing whitespace has been preserved
self.assertTrue(parsed.get('content-length') == "12",
'Header parsing did not strip trailing whitespace from header value')
# And whether leading has been preserved
self.assertTrue(parsed.get('host') == "example.com",
'Header parsing did not strip leading whitespace from header value')
# whitespace within header values should be preserved
self.assertTrue(parsed.get('user-agent') == "web browser",
'Header parsing stripped whitespace from middle of header value')
self.assertEqual(len(set_cookie_headers), 2)


def test_empty_header_handling(self):
# Ensure that and empty header string does not trigger exceptions
# and do not result in the return of headers
parsed = client.parse_headers(io.BytesIO(b""))
self.assertTrue(not parsed,
'Empty headers still resulted in headers being returned')


class HttpMethodTests(TestCase):
def test_invalid_method_names(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Ensure that :func:`http.client.parse_headers` uses the HTTP policy which now
returns RFC 7230 compliant header values. Patch by Ben Tasker.