Skip to content

Commit 5f977e0

Browse files
bpo-39040: Fix parsing of email mime headers with whitespace between encoded-words. (gh-17620)
* bpo-39040: Fix parsing of email headers with encoded-words inside a quoted string. It is fairly common to find malformed mime headers (especially content-disposition headers) where the parameter values, instead of being encoded to RFC standards, are "encoded" by doing RFC 2047 "encoded word" encoding, and then enclosing the whole thing in quotes. The processing of these malformed headers was incorrectly leaving the spaces between encoded words in the decoded text (whitespace between adjacent encoded words is supposed to be stripped on decoding). This changeset fixes the encoded word processing inside quoted strings (bare-quoted-string) to do correct RFC 2047 decoding by stripping that whitespace. (cherry picked from commit 21017ed) Co-authored-by: Abhilash Raj <maxking@users.noreply.github.com>
1 parent f5bd99b commit 5f977e0

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

Lib/email/_header_value_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,12 +1211,21 @@ def get_bare_quoted_string(value):
12111211
if value[0] in WSP:
12121212
token, value = get_fws(value)
12131213
elif value[:2] == '=?':
1214+
valid_ew = False
12141215
try:
12151216
token, value = get_encoded_word(value)
12161217
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
12171218
"encoded word inside quoted string"))
1219+
valid_ew = True
12181220
except errors.HeaderParseError:
12191221
token, value = get_qcontent(value)
1222+
# Collapse the whitespace between two encoded words that occur in a
1223+
# bare-quoted-string.
1224+
if valid_ew and len(bare_quoted_string) > 1:
1225+
if (bare_quoted_string[-1].token_type == 'fws' and
1226+
bare_quoted_string[-2].token_type == 'encoded-word'):
1227+
bare_quoted_string[-1] = EWWhiteSpaceTerminal(
1228+
bare_quoted_string[-1], 'fws')
12201229
else:
12211230
token, value = get_qcontent(value)
12221231
bare_quoted_string.append(token)

Lib/test/test_email/test_headerregistry.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,25 @@ def content_disp_as_value(self,
872872
{'filename': 'foo'},
873873
[errors.InvalidHeaderDefect]),
874874

875+
'invalid_parameter_value_with_fws_between_ew': (
876+
'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
877+
' =?UTF-8?Q?pdf?="',
878+
'attachment',
879+
{'filename': 'Schulbesuchsbestättigung.pdf'},
880+
[errors.InvalidHeaderDefect]*3,
881+
('attachment; filename="Schulbesuchsbestättigung.pdf"'),
882+
('Content-Disposition: attachment;\n'
883+
' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
884+
),
885+
886+
'parameter_value_with_fws_between_tokens': (
887+
'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
888+
'attachment',
889+
{'filename': 'File Name With Spaces.pdf'},
890+
[errors.InvalidHeaderDefect],
891+
'attachment; filename="File Name With Spaces.pdf"',
892+
('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
893+
)
875894
}
876895

877896

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix parsing of invalid mime headers parameters by collapsing whitespace between
2+
encoded words in a bare-quote-string.

0 commit comments

Comments
 (0)