Skip to content

Commit

Permalink
there could be still long lines inside html tags (#204)
Browse files Browse the repository at this point in the history
* there could be still long lines inside html tags

so now it will reformat those before printing html

CPNHUB-132
  • Loading branch information
petrjasek authored Sep 14, 2022
1 parent 8b3892d commit 526a538
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
12 changes: 9 additions & 3 deletions newsroom/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class EmailGroup(TypedDict):
emails: List[str]


MAX_LINE_LENGTH = 998 # RFC 5322
MAX_LINE_LENGTH = 998 - 50 # RFC 5322 - buffer for html indentation


def handle_long_lines_text(text):
Expand All @@ -47,7 +47,7 @@ def handle_long_lines_text(text):
next_line = ''
words = line.split()
for word in words:
if len(next_line) + len(word) + 1 > MAX_LINE_LENGTH:
if len(next_line) + len(word) > MAX_LINE_LENGTH:
output.append(next_line)
next_line = word
else:
Expand All @@ -63,7 +63,13 @@ def handle_long_lines_html(html):
if not any([len(line) > MAX_LINE_LENGTH for line in lines]):
return html
parsed = etree.fromstring(html, parser=etree.HTMLParser())
return etree.tounicode(parsed, method="html", pretty_print=True)
etree.indent(parsed, space=" ") # like pretty print but upfront
for elem in parsed.iter():
if elem.text is not None and len(elem.text) > MAX_LINE_LENGTH:
elem.text = handle_long_lines_text(elem.text) + "\n"
if elem.tail is not None and len(elem.tail) > MAX_LINE_LENGTH:
elem.tail = handle_long_lines_text(elem.tail) + "\n"
return etree.tostring(parsed, method="html", encoding="unicode")


@celery.task(soft_time_limit=120)
Expand Down
15 changes: 14 additions & 1 deletion tests/core/test_emails.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from flask import render_template_string, json, url_for
from jinja2 import TemplateNotFound

from newsroom.email import send_new_item_notification_email, map_email_recipients_by_language, EmailGroup, send_email
from newsroom.email import (
send_new_item_notification_email,
map_email_recipients_by_language,
EmailGroup,
send_email,
handle_long_lines_html,
)
from unittest import mock


Expand Down Expand Up @@ -164,6 +170,13 @@ def test_email_avoid_long_lines(client, app, mocker):
assert 501 == len(lines[2])


def test_handle_long_lines_html():
html = "<div><p>{}</p></div>".format("foo bar <a href=\"test\">{}</a>baz".format("loong link" * 1000) * 50)
formatted = handle_long_lines_html(html)
for line in formatted.splitlines():
assert len(line) < 998, line


def check_lines_length(text, length=998):
lines = text.splitlines()
for line in lines:
Expand Down

0 comments on commit 526a538

Please sign in to comment.