Skip to content

Commit a6ae02d

Browse files
bpo-39040: Fix parsing of email mime headers with whitespace between encoded-words. (gh-17620)
* bpo-39040: Fix parsing of email headers with encoded-words inside a quoted string. It is fairly common to find malformed mime headers (especially content-disposition headers) where the parameter values, instead of being encoded to RFC standards, are "encoded" by doing RFC 2047 "encoded word" encoding, and then enclosing the whole thing in quotes. The processing of these malformed headers was incorrectly leaving the spaces between encoded words in the decoded text (whitespace between adjacent encoded words is supposed to be stripped on decoding). This changeset fixes the encoded word processing inside quoted strings (bare-quoted-string) to do correct RFC 2047 decoding by stripping that whitespace. (cherry picked from commit 21017ed) Co-authored-by: Abhilash Raj <maxking@users.noreply.github.com>
1 parent 8e5f11d commit a6ae02d

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

Lib/email/_header_value_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,12 +1218,21 @@ def get_bare_quoted_string(value):
12181218
if value[0] in WSP:
12191219
token, value = get_fws(value)
12201220
elif value[:2] == '=?':
1221+
valid_ew = False
12211222
try:
12221223
token, value = get_encoded_word(value)
12231224
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
12241225
"encoded word inside quoted string"))
1226+
valid_ew = True
12251227
except errors.HeaderParseError:
12261228
token, value = get_qcontent(value)
1229+
# Collapse the whitespace between two encoded words that occur in a
1230+
# bare-quoted-string.
1231+
if valid_ew and len(bare_quoted_string) > 1:
1232+
if (bare_quoted_string[-1].token_type == 'fws' and
1233+
bare_quoted_string[-2].token_type == 'encoded-word'):
1234+
bare_quoted_string[-1] = EWWhiteSpaceTerminal(
1235+
bare_quoted_string[-1], 'fws')
12271236
else:
12281237
token, value = get_qcontent(value)
12291238
bare_quoted_string.append(token)

Lib/test/test_email/test_headerregistry.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,25 @@ def content_disp_as_value(self,
873873
{'filename': 'foo'},
874874
[errors.InvalidHeaderDefect]),
875875

876+
'invalid_parameter_value_with_fws_between_ew': (
877+
'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
878+
' =?UTF-8?Q?pdf?="',
879+
'attachment',
880+
{'filename': 'Schulbesuchsbestättigung.pdf'},
881+
[errors.InvalidHeaderDefect]*3,
882+
('attachment; filename="Schulbesuchsbestättigung.pdf"'),
883+
('Content-Disposition: attachment;\n'
884+
' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
885+
),
886+
887+
'parameter_value_with_fws_between_tokens': (
888+
'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
889+
'attachment',
890+
{'filename': 'File Name With Spaces.pdf'},
891+
[errors.InvalidHeaderDefect],
892+
'attachment; filename="File Name With Spaces.pdf"',
893+
('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
894+
)
876895
}
877896

878897

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix parsing of invalid mime headers parameters by collapsing whitespace between
2+
encoded words in a bare-quote-string.

0 commit comments

Comments
 (0)