8080# Useful constants and functions
8181#
8282
83- WSP = set (' \t ' )
83+ _WSP = ' \t '
84+ WSP = set (_WSP )
8485CFWS_LEADER = WSP | set ('(' )
8586SPECIALS = set (r'()<>@,:;.\"[]' )
8687ATOM_ENDS = SPECIALS | WSP
@@ -2835,6 +2836,7 @@ def _steal_trailing_WSP_if_exists(lines):
28352836 lines .pop ()
28362837 return wsp
28372838
2839+
28382840def _refold_parse_tree (parse_tree , * , policy ):
28392841 """Return string of contents of parse_tree folded according to RFC rules.
28402842
@@ -2843,11 +2845,9 @@ def _refold_parse_tree(parse_tree, *, policy):
28432845 maxlen = policy .max_line_length or sys .maxsize
28442846 encoding = 'utf-8' if policy .utf8 else 'us-ascii'
28452847 lines = ['' ] # Folded lines to be output
2846- leading_whitespace = '' # When we have whitespace between two encoded
2847- # words, we may need to encode the whitespace
2848- # at the beginning of the second word.
2849- last_ew = None # Points to the last encoded character if there's an ew on
2850- # the line
2848+ last_word_is_ew = False
2849+ last_ew = None # if there is an encoded word in the last line of lines,
2850+ # points to the encoded word's first character
28512851 last_charset = None
28522852 wrap_as_ew_blocked = 0
28532853 want_encoding = False # This is set to True if we need to encode this part
@@ -2882,6 +2882,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28822882 if part .token_type == 'mime-parameters' :
28832883 # Mime parameter folding (using RFC2231) is extra special.
28842884 _fold_mime_parameters (part , lines , maxlen , encoding )
2885+ last_word_is_ew = False
28852886 continue
28862887
28872888 if want_encoding and not wrap_as_ew_blocked :
@@ -2898,6 +2899,7 @@ def _refold_parse_tree(parse_tree, *, policy):
28982899 # XXX what if encoded_part has no leading FWS?
28992900 lines .append (newline )
29002901 lines [- 1 ] += encoded_part
2902+ last_word_is_ew = False
29012903 continue
29022904 # Either this is not a major syntactic break, so we don't
29032905 # want it on a line by itself even if it fits, or it
@@ -2916,11 +2918,16 @@ def _refold_parse_tree(parse_tree, *, policy):
29162918 (last_charset == 'unknown-8bit' or
29172919 last_charset == 'utf-8' and charset != 'us-ascii' )):
29182920 last_ew = None
2919- last_ew = _fold_as_ew (tstr , lines , maxlen , last_ew ,
2920- part .ew_combine_allowed , charset , leading_whitespace )
2921- # This whitespace has been added to the lines in _fold_as_ew()
2922- # so clear it now.
2923- leading_whitespace = ''
2921+ last_ew = _fold_as_ew (
2922+ tstr ,
2923+ lines ,
2924+ maxlen ,
2925+ last_ew ,
2926+ part .ew_combine_allowed ,
2927+ charset ,
2928+ last_word_is_ew ,
2929+ )
2930+ last_word_is_ew = True
29242931 last_charset = charset
29252932 want_encoding = False
29262933 continue
@@ -2933,28 +2940,19 @@ def _refold_parse_tree(parse_tree, *, policy):
29332940
29342941 if len (tstr ) <= maxlen - len (lines [- 1 ]):
29352942 lines [- 1 ] += tstr
2943+ last_word_is_ew = last_word_is_ew and not bool (tstr .strip (_WSP ))
29362944 continue
29372945
29382946 # This part is too long to fit. The RFC wants us to break at
29392947 # "major syntactic breaks", so unless we don't consider this
29402948 # to be one, check if it will fit on the next line by itself.
2941- leading_whitespace = ''
29422949 if (part .syntactic_break and
29432950 len (tstr ) + 1 <= maxlen ):
29442951 newline = _steal_trailing_WSP_if_exists (lines )
29452952 if newline or part .startswith_fws ():
2946- # We're going to fold the data onto a new line here. Due to
2947- # the way encoded strings handle continuation lines, we need to
2948- # be prepared to encode any whitespace if the next line turns
2949- # out to start with an encoded word.
29502953 lines .append (newline + tstr )
2951-
2952- whitespace_accumulator = []
2953- for char in lines [- 1 ]:
2954- if char not in WSP :
2955- break
2956- whitespace_accumulator .append (char )
2957- leading_whitespace = '' .join (whitespace_accumulator )
2954+ last_word_is_ew = (last_word_is_ew
2955+ and not bool (lines [- 1 ].strip (_WSP )))
29582956 last_ew = None
29592957 continue
29602958 if not hasattr (part , 'encode' ):
@@ -2994,10 +2992,11 @@ def _refold_parse_tree(parse_tree, *, policy):
29942992 else :
29952993 # We can't fold it onto the next line either...
29962994 lines [- 1 ] += tstr
2995+ last_word_is_ew = last_word_is_ew and not bool (tstr .strip (_WSP ))
29972996
29982997 return policy .linesep .join (lines ) + policy .linesep
29992998
3000- def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , leading_whitespace ):
2999+ def _fold_as_ew (to_encode , lines , maxlen , last_ew , ew_combine_allowed , charset , last_word_is_ew ):
30013000 """Fold string to_encode into lines as encoded word, combining if allowed.
30023001 Return the new value for last_ew, or None if ew_combine_allowed is False.
30033002
@@ -3012,6 +3011,16 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30123011 to_encode = str (
30133012 get_unstructured (lines [- 1 ][last_ew :] + to_encode ))
30143013 lines [- 1 ] = lines [- 1 ][:last_ew ]
3014+ elif last_word_is_ew :
3015+ # If we are following up an encoded word with another encoded word,
3016+ # any white space between the two will be ignored when decoded.
3017+ # Therefore, we encode all to-be-displayed whitespace in the second
3018+ # encoded word.
3019+ len_without_wsp = len (lines [- 1 ].rstrip (_WSP ))
3020+ leading_whitespace = lines [- 1 ][len_without_wsp :]
3021+ lines [- 1 ] = (lines [- 1 ][:len_without_wsp ]
3022+ + (' ' if leading_whitespace else '' ))
3023+ to_encode = leading_whitespace + to_encode
30153024 elif to_encode [0 ] in WSP :
30163025 # We're joining this to non-encoded text, so don't encode
30173026 # the leading blank.
@@ -3040,20 +3049,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30403049
30413050 while to_encode :
30423051 remaining_space = maxlen - len (lines [- 1 ])
3043- text_space = remaining_space - chrome_len - len ( leading_whitespace )
3052+ text_space = remaining_space - chrome_len
30443053 if text_space <= 0 :
3045- lines .append (' ' )
3054+ newline = _steal_trailing_WSP_if_exists (lines )
3055+ lines .append (newline or ' ' )
3056+ new_last_ew = len (lines [- 1 ])
30463057 continue
30473058
3048- # If we are at the start of a continuation line, prepend whitespace
3049- # (we only want to do this when the line starts with an encoded word
3050- # but if we're folding in this helper function, then we know that we
3051- # are going to be writing out an encoded word.)
3052- if len (lines ) > 1 and len (lines [- 1 ]) == 1 and leading_whitespace :
3053- encoded_word = _ew .encode (leading_whitespace , charset = encode_as )
3054- lines [- 1 ] += encoded_word
3055- leading_whitespace = ''
3056-
30573059 to_encode_word = to_encode [:text_space ]
30583060 encoded_word = _ew .encode (to_encode_word , charset = encode_as )
30593061 excess = len (encoded_word ) - remaining_space
@@ -3065,7 +3067,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
30653067 excess = len (encoded_word ) - remaining_space
30663068 lines [- 1 ] += encoded_word
30673069 to_encode = to_encode [len (to_encode_word ):]
3068- leading_whitespace = ''
30693070
30703071 if to_encode :
30713072 lines .append (' ' )
0 commit comments