Skip to content
This repository was archived by the owner on Sep 18, 2023. It is now read-only.

Commit 5bc696b

Browse files
Sanitise notification HTML
Summary: Qt labels support a HTML subset, using a completely internal parser in QTextDocument. The Notification spec support an even smaller subset of notification elements. It's important to strip out irrelevant tags that could potentially load remote information without user interaction, such as img src or even <b style="background:url... But we want to maintain the basic rich text formatting of bold and italics and links. This parser iterates reads the XML, copying only permissable tags and attributes. A future obvious improvement would be to merge the original regular expressions into this stream parser, but I'm trying to minimise breakages to get this into 5.12. Test Plan: Moved code into it's own class for easy unit testing Tried a bunch of things, including what the old regexes were doing Also ran notify send with a few options to make sure things worked Reviewers: #plasma, fvogt Reviewed By: fvogt Subscribers: aacid, fvogt, plasma-devel Tags: #plasma Differential Revision: https://phabricator.kde.org/D10188
1 parent 265ab95 commit 5bc696b

File tree

5 files changed

+219
-17
lines changed

5 files changed

+219
-17
lines changed

dataengines/notifications/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set(notifications_engine_SRCS
44
notificationsengine.cpp
55
notificationservice.cpp
66
notificationaction.cpp
7+
notificationsanitizer.cpp
78
)
89

910
qt5_add_dbus_adaptor( notifications_engine_SRCS org.freedesktop.Notifications.xml notificationsengine.h NotificationsEngine )
@@ -26,3 +27,10 @@ kcoreaddons_desktop_to_json(plasma_engine_notifications plasma-dataengine-notifi
2627
install(TARGETS plasma_engine_notifications DESTINATION ${KDE_INSTALL_PLUGINDIR}/plasma/dataengine)
2728
install(FILES plasma-dataengine-notifications.desktop DESTINATION ${KDE_INSTALL_KSERVICES5DIR} )
2829
install(FILES notifications.operations DESTINATION ${PLASMA_DATA_INSTALL_DIR}/services)
30+
31+
32+
#unit test
33+
34+
add_executable(notification_test notificationsanitizer.cpp notifications_test.cpp)
35+
target_link_libraries(notification_test Qt5::Test Qt5::Core)
36+
ecm_mark_as_test(notification_test)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include <QtTest>
2+
#include <QObject>
3+
#include <QDebug>
4+
#include "notificationsanitizer.h"
5+
6+
class NotificationTest : public QObject
7+
{
8+
Q_OBJECT
9+
public:
10+
NotificationTest() {}
11+
private Q_SLOTS:
12+
void parse_data();
13+
void parse();
14+
};
15+
16+
void NotificationTest::parse_data()
17+
{
18+
QTest::addColumn<QString>("messageIn");
19+
QTest::addColumn<QString>("expectedOut");
20+
21+
QTest::newRow("basic no HTML") << "I am a notification" << "I am a notification";
22+
QTest::newRow("whitespace") << " I am a notification " << "I am a notification";
23+
24+
QTest::newRow("basic html") << "I am <b>the</b> notification" << "I am <b>the</b> notification";
25+
QTest::newRow("nested html") << "I am <i><b>the</b></i> notification" << "I am <i><b>the</b></i> notification";
26+
27+
QTest::newRow("no extra tags") << "I am <blink>the</blink> notification" << "I am the notification";
28+
QTest::newRow("no extra attrs") << "I am <b style=\"font-weight:20\">the</b> notification" << "I am <b>the</b> notification";
29+
30+
QTest::newRow("newlines") << "I am\nthe\nnotification" << "I am<br/>the<br/>notification";
31+
QTest::newRow("multinewlines") << "I am\n\nthe\n\n\nnotification" << "I am<br/>the<br/>notification";
32+
33+
QTest::newRow("amp") << "me&you" << "me&amp;you";
34+
QTest::newRow("double escape") << "foo &amp; &lt;bar&gt;" << "foo &amp; &lt;bar&gt;";
35+
36+
QTest::newRow("quotes") << "&apos;foo&apos;" << "'foo'";//as label can't handle this normally valid entity
37+
38+
QTest::newRow("image normal") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text";
39+
40+
//this input is technically wrong, so the output is also wrong, but QTextHtmlParser does the "right" thing
41+
QTest::newRow("image normal no close") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text</img>";
42+
43+
QTest::newRow("image remote URL") << "This is <img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is <img alt=\"cheese\"/> and more text";
44+
45+
//more bad formatted options. To some extent actual output doesn't matter. Garbage in, garbabe out.
46+
//the important thing is that it doesn't contain anything that could be parsed as the remote URL
47+
QTest::newRow("image remote URL no close") << "This is <img src=\"http://foo.com/boo.png>\" alt=\"cheese\"> and more text" << "This is <img alt=\"cheese\"> and more text</img>";
48+
QTest::newRow("image remote URL double open") << "This is <<img src=\"http://foo.com/boo.png>\" and more text" << "This is ";
49+
QTest::newRow("image remote URL no entitiy close") << "This is <img src=\"http://foo.com/boo.png\" and more text" << "This is ";
50+
QTest::newRow("image remote URL space in element name") << "This is < img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is ";
51+
52+
QTest::newRow("link") << "This is a link <a href=\"http://foo.com/boo\"/> and more text" << "This is a link <a href=\"http://foo.com/boo\"/> and more text";
53+
}
54+
55+
void NotificationTest::parse()
56+
{
57+
QFETCH(QString, messageIn);
58+
QFETCH(QString, expectedOut);
59+
60+
const QString out = NotificationSanitizer::parse(messageIn);
61+
expectedOut = "<?xml version=\"1.0\"?><html>" + expectedOut + "</html>\n";
62+
QCOMPARE(out, expectedOut);
63+
}
64+
65+
66+
QTEST_GUILESS_MAIN(NotificationTest)
67+
68+
#include "notifications_test.moc"
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
3+
*
4+
* This program is free software you can redistribute it and/or
5+
* modify it under the terms of the GNU Library General Public
6+
* License as published by the Free Software Foundation; either
7+
* version 2 of the License, or (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
* Library General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU Library General Public License
15+
* along with this library; see the file COPYING.LIB. If not, write to
16+
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17+
* Boston, MA 02110-1301, USA.
18+
*/
19+
20+
#include "notificationsanitizer.h"
21+
22+
#include <QXmlStreamReader>
23+
#include <QXmlStreamWriter>
24+
#include <QRegularExpression>
25+
#include <QDebug>
26+
#include <QUrl>
27+
28+
QString NotificationSanitizer::parse(const QString &text)
29+
{
30+
// replace all \ns with <br/>
31+
QString t = text;
32+
33+
t.replace(QLatin1String("\n"), QStringLiteral("<br/>"));
34+
// Now remove all inner whitespace (\ns are already <br/>s)
35+
t = t.simplified();
36+
// Finally, check if we don't have multiple <br/>s following,
37+
// can happen for example when "\n \n" is sent, this replaces
38+
// all <br/>s in succsession with just one
39+
t.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
40+
// This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
41+
// text where it finds a stray ampersand.
42+
// Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
43+
t.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
44+
45+
QXmlStreamReader r(QStringLiteral("<html>") + t + QStringLiteral("</html>"));
46+
QString result;
47+
QXmlStreamWriter out(&result);
48+
49+
const QVector<QString> allowedTags = {"b", "i", "u", "img", "a", "html", "br"};
50+
51+
out.writeStartDocument();
52+
while (!r.atEnd()) {
53+
r.readNext();
54+
55+
if (r.tokenType() == QXmlStreamReader::StartElement) {
56+
const QString name = r.name().toString();
57+
if (!allowedTags.contains(name)) {
58+
continue;
59+
}
60+
out.writeStartElement(name);
61+
if (name == QLatin1String("img")) {
62+
auto src = r.attributes().value("src").toString();
63+
auto alt = r.attributes().value("alt").toString();
64+
65+
const QUrl url(src);
66+
if (url.isLocalFile()) {
67+
out.writeAttribute(QStringLiteral("src"), src);
68+
} else {
69+
//image denied for security reasons! Do not copy the image src here!
70+
}
71+
72+
out.writeAttribute(QStringLiteral("alt"), alt);
73+
}
74+
if (name == QLatin1String("a")) {
75+
out.writeAttribute(QStringLiteral("href"), r.attributes().value("href").toString());
76+
}
77+
}
78+
79+
if (r.tokenType() == QXmlStreamReader::EndElement) {
80+
const QString name = r.name().toString();
81+
if (!allowedTags.contains(name)) {
82+
continue;
83+
}
84+
out.writeEndElement();
85+
}
86+
87+
if (r.tokenType() == QXmlStreamReader::Characters) {
88+
const auto text = r.text().toString();
89+
out.writeCharacters(text); //this auto escapes chars -> HTML entities
90+
}
91+
}
92+
out.writeEndDocument();
93+
94+
if (r.hasError()) {
95+
qWarning() << "Notification to send to backend contains invalid XML: "
96+
<< r.errorString() << "line" << r.lineNumber()
97+
<< "col" << r.columnNumber();
98+
}
99+
100+
// The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
101+
// so we need to replace it here otherwise it will not render at all.
102+
result = result.replace(QLatin1String("&apos;"), QChar('\''));
103+
104+
105+
return result;
106+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
3+
*
4+
* This program is free software you can redistribute it and/or
5+
* modify it under the terms of the GNU Library General Public
6+
* License as published by the Free Software Foundation; either
7+
* version 2 of the License, or (at your option) any later version.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
* Library General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU Library General Public License
15+
* along with this library; see the file COPYING.LIB. If not, write to
16+
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17+
* Boston, MA 02110-1301, USA.
18+
*/
19+
20+
#include <QString>
21+
22+
namespace NotificationSanitizer
23+
{
24+
/*
25+
* This turns generic random text of either plain text of any degree of faux-HTML into HTML allowed
26+
* in the notification spec namely:
27+
* a, img, b, i, u and br
28+
* All other tags and attributes are stripped
29+
* Whitespace is stripped and converted to <br/>
30+
* Double newlines are compressed
31+
*
32+
* Image src is only copied when referring to a local file
33+
*/
34+
QString parse(const QString &in);
35+
}

dataengines/notifications/notificationsengine.cpp

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "notificationsengine.h"
2121
#include "notificationservice.h"
2222
#include "notificationsadaptor.h"
23+
#include "notificationsanitizer.h"
2324

2425
#include <QDebug>
2526
#include <KConfigGroup>
@@ -261,23 +262,7 @@ uint NotificationsEngine::Notify(const QString &app_name, uint replaces_id,
261262
const QString source = QStringLiteral("notification %1").arg(id);
262263

263264
QString bodyFinal = (partOf == 0 ? body : _body);
264-
// First trim whitespace from beginning and end
265-
bodyFinal = bodyFinal.trimmed();
266-
// Now replace all \ns with <br/>
267-
bodyFinal = bodyFinal.replace(QLatin1String("\n"), QLatin1String("<br/>"));
268-
// Now remove all inner whitespace (\ns are already <br/>s
269-
bodyFinal = bodyFinal.simplified();
270-
// Finally, check if we don't have multiple <br/>s following,
271-
// can happen for example when "\n \n" is sent, this replaces
272-
// all <br/>s in succsession with just one
273-
bodyFinal.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
274-
// This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
275-
// text where it finds a stray ampersand.
276-
// Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
277-
bodyFinal.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
278-
// The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
279-
// so we need to replace it here otherwise it will not render at all.
280-
bodyFinal.replace(QLatin1String("&apos;"), QChar('\''));
265+
bodyFinal = NotificationSanitizer::parse(bodyFinal);
281266

282267
Plasma::DataEngine::Data notificationData;
283268
notificationData.insert(QStringLiteral("id"), QString::number(id));

0 commit comments

Comments
 (0)