|
| 1 | +From 16918c1df3e709df2a97281e3825d94c84edb668 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Christian Ehrlicher <ch.ehrlicher@gmx.de> |
| 3 | +Date: Tue, 06 Aug 2024 22:39:44 +0200 |
| 4 | +Subject: [PATCH] XML/QDom: speedup encodeText() |
| 5 | + |
| 6 | +The code copied the whole string, then replaced parts inline, at |
| 7 | +the cost of relocating everything beyond, at each replacement. |
| 8 | +Instead, copy character by character (in chunks where possible) |
| 9 | +and append replacements as we skip what they replace. |
| 10 | + |
| 11 | +Manual conflict resolution for 6.5: |
| 12 | +- This is a manual cherry-pick. The original change was only |
| 13 | + picked to 6.8, but the quadratic behavior is present in Qt 5, too. |
| 14 | +- Changed Task-number to Fixes: because this is the real fix; |
| 15 | + the QString change, 315210de916d060c044c01e53ff249d676122b1b, |
| 16 | + was unrelated to the original QTBUG-127549. |
| 17 | + |
| 18 | +Manual conflcit resolution for 5.15: |
| 19 | +- Kept/re-added QTextCodec::canEncode() check |
| 20 | +- Ported from Qt 6 to 5, to wit: |
| 21 | + - qsizetype -> int |
| 22 | + - QStringView::first/sliced(n) -> left/mid(n) |
| 23 | + (these functions are clearly called in-range, so the widened |
| 24 | + contract of the Qt 5 functions doesn't matter) |
| 25 | +- Ported from C++17- and C++14-isms to C++11: |
| 26 | + - replaced polymorphic lambda with a normal one (this requires |
| 27 | + rewriting the !canEncode() branch to use QByteArray/QLatin1String |
| 28 | + instead of QString) |
| 29 | +- As a drive-by, corrected the indentation of the case labels to |
| 30 | + horizontally align existing code (and follow Qt style) |
| 31 | + |
| 32 | +Fixes: QTBUG-127549 |
| 33 | +Change-Id: I368482859ed0c4127f1eec2919183711b5488ada |
| 34 | +Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> |
| 35 | +(cherry picked from commit 2ce08e3671b8d18b0284447e5908ce15e6e8f80f) |
| 36 | +Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> |
| 37 | +(cherry picked from commit 225e235cf966a44af23dbe9aaaa2fd20ab6430ee) |
| 38 | +Reviewed-by: Fabian Kosmale <fabian.kosmale@qt.io> |
| 39 | +(cherry picked from commit 905a5bd421efff6a1d90b6140500d134d32ca745) |
| 40 | +--- |
| 41 | + |
| 42 | +diff --git a/qtbase/src/xml/dom/qdom.cpp b/qtbase/src/xml/dom/qdom.cpp |
| 43 | +index 872221c..bf70477 100644 |
| 44 | +--- a/qtbase/src/xml/dom/qdom.cpp |
| 45 | ++++ b/qtbase/src/xml/dom/qdom.cpp |
| 46 | +@@ -3676,59 +3676,67 @@ |
| 47 | + const QTextCodec *const codec = s.codec(); |
| 48 | + Q_ASSERT(codec); |
| 49 | + #endif |
| 50 | +- QString retval(str); |
| 51 | +- int len = retval.length(); |
| 52 | +- int i = 0; |
| 53 | ++ QString retval; |
| 54 | ++ int start = 0; |
| 55 | ++ auto appendToOutput = [&](int cur, QLatin1String replacement) |
| 56 | ++ { |
| 57 | ++ if (start < cur) { |
| 58 | ++ retval.reserve(str.size() + replacement.size()); |
| 59 | ++ retval.append(QStringView(str).left(cur).mid(start)); |
| 60 | ++ } |
| 61 | ++ // Skip over str[cur], replaced by replacement |
| 62 | ++ start = cur + 1; |
| 63 | ++ retval.append(replacement); |
| 64 | ++ }; |
| 65 | + |
| 66 | +- while (i < len) { |
| 67 | +- const QChar ati(retval.at(i)); |
| 68 | +- |
| 69 | +- if (ati == QLatin1Char('<')) { |
| 70 | +- retval.replace(i, 1, QLatin1String("<")); |
| 71 | +- len += 3; |
| 72 | +- i += 4; |
| 73 | +- } else if (encodeQuotes && (ati == QLatin1Char('"'))) { |
| 74 | +- retval.replace(i, 1, QLatin1String(""")); |
| 75 | +- len += 5; |
| 76 | +- i += 6; |
| 77 | +- } else if (ati == QLatin1Char('&')) { |
| 78 | +- retval.replace(i, 1, QLatin1String("&")); |
| 79 | +- len += 4; |
| 80 | +- i += 5; |
| 81 | +- } else if (ati == QLatin1Char('>') && i >= 2 && retval[i - 1] == QLatin1Char(']') && retval[i - 2] == QLatin1Char(']')) { |
| 82 | +- retval.replace(i, 1, QLatin1String(">")); |
| 83 | +- len += 3; |
| 84 | +- i += 4; |
| 85 | +- } else if (performAVN && |
| 86 | +- (ati == QChar(0xA) || |
| 87 | +- ati == QChar(0xD) || |
| 88 | +- ati == QChar(0x9))) { |
| 89 | +- const QString replacement(QLatin1String("&#x") + QString::number(ati.unicode(), 16) + QLatin1Char(';')); |
| 90 | +- retval.replace(i, 1, replacement); |
| 91 | +- i += replacement.length(); |
| 92 | +- len += replacement.length() - 1; |
| 93 | +- } else if (encodeEOLs && ati == QChar(0xD)) { |
| 94 | +- retval.replace(i, 1, QLatin1String("
")); // Replace a single 0xD with a ref for 0xD |
| 95 | +- len += 4; |
| 96 | +- i += 5; |
| 97 | +- } else { |
| 98 | ++ const int len = str.size(); |
| 99 | ++ for (int cur = 0; cur < len; ++cur) { |
| 100 | ++ switch (const char16_t ati = str[cur].unicode()) { |
| 101 | ++ case u'<': |
| 102 | ++ appendToOutput(cur, QLatin1String("<")); |
| 103 | ++ break; |
| 104 | ++ case u'"': |
| 105 | ++ if (encodeQuotes) |
| 106 | ++ appendToOutput(cur, QLatin1String(""")); |
| 107 | ++ break; |
| 108 | ++ case u'&': |
| 109 | ++ appendToOutput(cur, QLatin1String("&")); |
| 110 | ++ break; |
| 111 | ++ case u'>': |
| 112 | ++ if (cur >= 2 && str[cur - 1] == u']' && str[cur - 2] == u']') |
| 113 | ++ appendToOutput(cur, QLatin1String(">")); |
| 114 | ++ break; |
| 115 | ++ case u'\r': |
| 116 | ++ if (performAVN || encodeEOLs) |
| 117 | ++ appendToOutput(cur, QLatin1String("
")); // \r == 0x0d |
| 118 | ++ break; |
| 119 | ++ case u'\n': |
| 120 | ++ if (performAVN) |
| 121 | ++ appendToOutput(cur, QLatin1String("
")); // \n == 0x0a |
| 122 | ++ break; |
| 123 | ++ case u'\t': |
| 124 | ++ if (performAVN) |
| 125 | ++ appendToOutput(cur, QLatin1String("	")); // \t == 0x09 |
| 126 | ++ break; |
| 127 | ++ default: |
| 128 | + #if QT_CONFIG(textcodec) |
| 129 | + if(codec->canEncode(ati)) |
| 130 | +- ++i; |
| 131 | ++ ; // continue |
| 132 | + else |
| 133 | + #endif |
| 134 | + { |
| 135 | + // We have to use a character reference to get it through. |
| 136 | +- const ushort codepoint(ati.unicode()); |
| 137 | +- const QString replacement(QLatin1String("&#x") + QString::number(codepoint, 16) + QLatin1Char(';')); |
| 138 | +- retval.replace(i, 1, replacement); |
| 139 | +- i += replacement.length(); |
| 140 | +- len += replacement.length() - 1; |
| 141 | ++ const QByteArray replacement = "&#x" + QByteArray::number(uint{ati}, 16) + ';'; |
| 142 | ++ appendToOutput(cur, QLatin1String{replacement}); |
| 143 | + } |
| 144 | ++ break; |
| 145 | + } |
| 146 | + } |
| 147 | +- |
| 148 | +- return retval; |
| 149 | ++ if (start > 0) { |
| 150 | ++ retval.append(QStringView(str).left(len).mid(start)); |
| 151 | ++ return retval; |
| 152 | ++ } |
| 153 | ++ return str; |
| 154 | + } |
| 155 | + |
| 156 | + void QDomAttrPrivate::save(QTextStream& s, int, int) const |
0 commit comments