Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions Lib/email/contentmanager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import binascii
import email.charset
import email.message
Expand Down Expand Up @@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length):


def _encode_text(string, charset, cte, policy):
# max_line_length 0/None means no limit, ie: infinitely long.
maxlen = policy.max_line_length or sys.maxsize
lines = string.encode(charset).splitlines()
linesep = policy.linesep.encode('ascii')
def embedded_body(lines): return linesep.join(lines) + linesep
def normal_body(lines): return b'\n'.join(lines) + b'\n'
if cte is None:
# Use heuristics to decide on the "best" encoding.
if max((len(x) for x in lines), default=0) <= policy.max_line_length:
if max((len(x) for x in lines), default=0) <= maxlen:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use: max(map(len, lines), default=0), though I'm not sure which is the fastest and which is the most "readable".

@bitdancer What is the style you'd recommend in general in the email package? would you rather for readability, succintness, or performance?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just my comment: we're not required to take the fastest route here, since we may not have a heavy load on the email module. Still wish to hear more comments before we resolve this :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, mailman can put a pretty heavy load on the email package, and it is one of the biggest consumers of the package. While I know there are opportunities for performance improvements in the new email code that I haven't gotten around to investigating, I personally tend to do what others call micro-optimizations when I notice them[*], unless they make the code really hard to understand. Not that I know which is faster, here.

[*] I read an article once where Knuth felt his comment had been misinterpreted, and pointed out that if you routinely ignore optimization opportunities you pretty quickly end up with a really slow program.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[*] I read an article once where Knuth felt his comment had been misinterpreted, and pointed out that if you routinely ignore optimization opportunities, you pretty quickly end up with a really slow program.

@bitdancer Thanks for sharing the additional context — it’s really helpful to know there’s a concrete performance impact for real users like Mailman.

I agree that starting with small, focused micro-optimizations makes sense, especially since we’re planning a broader optimization pass for the email module.

I’ve incorporated your suggestion and updated the comment in the code accordingly.
Please also let me know if you have any specific areas you think would benefit most from further optimization work. Hope to collaborate on more work with you!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FTR, I don't think this will change much because we've seen times when code is slower with an map() and sometimes slower with a comprehension. If we want to make the code faster, we should run better benchmarks.

try:
return '7bit', normal_body(lines).decode('ascii')
except UnicodeDecodeError:
Expand All @@ -157,7 +160,7 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
sniff = embedded_body(lines[:10])
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
policy.max_line_length)
maxlen)
sniff_base64 = binascii.b2a_base64(sniff)
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
if len(sniff_qp) > len(sniff_base64):
Expand All @@ -172,9 +175,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n'
data = normal_body(lines).decode('ascii', 'surrogateescape')
elif cte == 'quoted-printable':
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
policy.max_line_length)
maxlen)
elif cte == 'base64':
data = _encode_base64(embedded_body(lines), policy.max_line_length)
data = _encode_base64(embedded_body(lines), maxlen)
else:
raise ValueError("Unknown content transfer encoding {}".format(cte))
return cte, data
Expand Down
28 changes: 28 additions & 0 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,34 @@ def test_folding_with_long_nospace_http_policy_1(self):
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])

def test_no_wrapping_with_zero_max_line_length(self):
pol = policy.default.clone(max_line_length=0)
subj = "S" * 100
msg = EmailMessage(policy=pol)
msg["From"] = "a@ex.com"
msg["To"] = "b@ex.com"
msg["Subject"] = subj

raw = msg.as_bytes()
self.assertNotIn(b"\r\n ", raw, "Found fold indicator; wrapping not disabled")

parsed = message_from_bytes(raw, policy=policy.default)
self.assertEqual(parsed["Subject"], subj)

def test_no_wrapping_with_none_max_line_length(self):
pol = policy.default.clone(max_line_length=None)
subj = "S" * 100
body = "B" * 100
msg = EmailMessage(policy=pol)
msg["From"] = "a@ex.com"
msg["To"] = "b@ex.com"
msg["Subject"] = subj
msg.set_content(body)

parsed = message_from_bytes(msg.as_bytes(), policy=policy.default)
self.assertEqual(parsed["Subject"], subj)
self.assertEqual(parsed.get_body().get_content().rstrip('\n'), body)

def test_invalid_header_names(self):
invalid_headers = [
('Invalid Header', 'contains space'),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`email`: Ensure policy accepts unlimited line lengths by
treating 0 or :const:`None` as :data:`sys.maxsize`.
Loading