Skip to content

Commit eb1cdc1

Browse files
committed
Fix for email.generator.Generator with whitespace between encoded words.
email.generator.Generator currently does not handle whitespace between encoded words correctly when the encoded words span multiple lines. The current generator will create an encoded word for each line. If the end of the line happens to correspond with the end real word in the plaintext, the generator will place an unencoded space at the start of the subsequent lines to represent the whitespace between the plaintext words. A compliant decoder will strip all the whitespace from between two encoded words which leads to missing spaces in the round-tripped output. The fix for this is to make sure that whitespace between two encoded words ends up inside of one or the other of the encoded words. This fix places the space inside of the second encoded word. Test case from #92081
1 parent feca9bb commit eb1cdc1

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

Lib/email/_header_value_parser.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2766,10 +2766,11 @@ def _refold_parse_tree(parse_tree, *, policy):
27662766
# max_line_length 0/None means no limit, ie: infinitely long.
27672767
maxlen = policy.max_line_length or sys.maxsize
27682768
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
2769-
lines = ['']
2770-
last_ew = None
2769+
lines = [''] # Folded lines to be output
2770+
last_ew = None # Points to the last encoded character if there's an ew on
2771+
# the line
27712772
wrap_as_ew_blocked = 0
2772-
want_encoding = False
2773+
want_encoding = False # True if we need to encode this part
27732774
end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
27742775
parts = list(parse_tree)
27752776
while parts:
@@ -2793,10 +2794,12 @@ def _refold_parse_tree(parse_tree, *, policy):
27932794
# 'charset' property on the policy.
27942795
charset = 'utf-8'
27952796
want_encoding = True
2797+
27962798
if part.token_type == 'mime-parameters':
27972799
# Mime parameter folding (using RFC2231) is extra special.
27982800
_fold_mime_parameters(part, lines, maxlen, encoding)
27992801
continue
2802+
28002803
if want_encoding and not wrap_as_ew_blocked:
28012804
if not part.as_ew_allowed:
28022805
want_encoding = False
@@ -2826,6 +2829,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28262829
part.ew_combine_allowed, charset)
28272830
want_encoding = False
28282831
continue
2832+
28292833
if len(tstr) <= maxlen - len(lines[-1]):
28302834
lines[-1] += tstr
28312835
continue
@@ -2860,6 +2864,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28602864
else:
28612865
# We can't fold it onto the next line either...
28622866
lines[-1] += tstr
2867+
28632868
return policy.linesep.join(lines) + policy.linesep
28642869

28652870
def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
@@ -2877,14 +2882,15 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
28772882
to_encode = str(
28782883
get_unstructured(lines[-1][last_ew:] + to_encode))
28792884
lines[-1] = lines[-1][:last_ew]
2880-
if to_encode[0] in WSP:
2885+
elif to_encode[0] in WSP:
28812886
# We're joining this to non-encoded text, so don't encode
28822887
# the leading blank.
28832888
leading_wsp = to_encode[0]
28842889
to_encode = to_encode[1:]
28852890
if (len(lines[-1]) == maxlen):
28862891
lines.append(_steal_trailing_WSP_if_exists(lines))
28872892
lines[-1] += leading_wsp
2893+
28882894
trailing_wsp = ''
28892895
if to_encode[-1] in WSP:
28902896
# Likewise for the trailing space.

Lib/test/test_email/test_generator.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,19 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
232232
ioclass = io.BytesIO
233233
typ = lambda self, x: x.encode('ascii')
234234

235+
def test_defaults_handle_spaces_between_encoded_words_when_wrapped(self):
236+
source = ("Уведомление о принятии в работу обращения для"
237+
" подключения услуги")
238+
expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
239+
' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
240+
' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
241+
msg = EmailMessage()
242+
msg['Subject'] = source
243+
s = io.BytesIO()
244+
g = BytesGenerator(s)
245+
g.flatten(msg)
246+
self.assertEqual(s.getvalue(), expected)
247+
235248
def test_cte_type_7bit_handles_unknown_8bit(self):
236249
source = ("Subject: Maintenant je vous présente mon "
237250
"collègue\n\n").encode('utf-8')

0 commit comments

Comments
 (0)