Deprecate HTML auto-merging from core (plugin may be needed) #2889
Description
Background from isagalaev in #2529:
I was walking by, and while I didn't read through the entire thing I wanted to say this. There were a few features added to the core at users' requests when it was all small an nimble. These days I would say it makes much more sense to be opinionated and remove corner cases like useBR, tabWidth and several different spellings of "lang-", "language-" as language prefixes in the class name (as far as I remember there is a very precise recommendation in HTML5 to use "language-"). Everyone with special cases would be expected to do pre- or post-processing.
Very early on we were perhaps a bit too open and allowed some things to slip into core that perhaps should not have - and today they are all things that could easily be handled with our new plugin API.
HTML merging is a large swath of complexity (see utils.js
) that a few use for a corner case like:
var x;
<span class="important">var y;</span>
Which will be highlighted as (the HTML "passes thru"):
<span class="keyword">var</span> x;
<span class="important"><span class="keyword">var</span> y;</span>
Sadly, this feature also makes it impossible for us to detect HTML injection type mistakes (which we could otherwise detect and warn about)... #2886 HTML (generally) should not exist inside a code block - it should always be escaped. Its existence possibly indicates an HTML injection style vulnerability. If we remove this final edge case for HTML then we can inform users about the potential vulnerability vs letting them potentially shoot themselves in the foot.
This can now be easily achieved via a "after:highlightBlock" plugin and that's what should happen for anyone who absolutely needs this functionality.
- v10.5 Deprecate HTML merging (chore(plugins) move useBR, tabReplace, and HTML merging into internal plugins #2873)
- Someone may want to provide a plugin
- v11 Remove HTML merging
You can restore this functionality via a plugin. Below is plugin that we shipped as part of our own source briefly after this was ported to a plugin to make later extraction easy.
A maintainer (someone who wants to package this up as an official plugin and maintain it) is needed.
Usage:
hljs.addPlugin(mergeHTMLPlugin);
Plugin source:
var mergeHTMLPlugin = (function () {
'use strict';
var originalStream;
/**
* @param {string} value
* @returns {string}
*/
function escapeHTML(value) {
return value
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
/* plugin itself */
/** @type {HLJSPlugin} */
const mergeHTMLPlugin = {
// preserve the original HTML token stream
"before:highlightElement": ({ el }) => {
originalStream = nodeStream(el);
},
// merge it afterwards with the highlighted token stream
"after:highlightElement": ({ el, result, text }) => {
if (!originalStream.length) return;
const resultNode = document.createElement('div');
resultNode.innerHTML = result.value;
result.value = mergeStreams(originalStream, nodeStream(resultNode), text);
el.innerHTML = result.value;
}
};
/* Stream merging support functions */
/**
* @typedef Event
* @property {'start'|'stop'} event
* @property {number} offset
* @property {Node} node
*/
/**
* @param {Node} node
*/
function tag(node) {
return node.nodeName.toLowerCase();
}
/**
* @param {Node} node
*/
function nodeStream(node) {
/** @type Event[] */
const result = [];
(function _nodeStream(node, offset) {
for (let child = node.firstChild; child; child = child.nextSibling) {
if (child.nodeType === 3) {
offset += child.nodeValue.length;
} else if (child.nodeType === 1) {
result.push({
event: 'start',
offset: offset,
node: child
});
offset = _nodeStream(child, offset);
// Prevent void elements from having an end tag that would actually
// double them in the output. There are more void elements in HTML
// but we list only those realistically expected in code display.
if (!tag(child).match(/br|hr|img|input/)) {
result.push({
event: 'stop',
offset: offset,
node: child
});
}
}
}
return offset;
})(node, 0);
return result;
}
/**
* @param {any} original - the original stream
* @param {any} highlighted - stream of the highlighted source
* @param {string} value - the original source itself
*/
function mergeStreams(original, highlighted, value) {
let processed = 0;
let result = '';
const nodeStack = [];
function selectStream() {
if (!original.length || !highlighted.length) {
return original.length ? original : highlighted;
}
if (original[0].offset !== highlighted[0].offset) {
return (original[0].offset < highlighted[0].offset) ? original : highlighted;
}
/*
To avoid starting the stream just before it should stop the order is
ensured that original always starts first and closes last:
if (event1 == 'start' && event2 == 'start')
return original;
if (event1 == 'start' && event2 == 'stop')
return highlighted;
if (event1 == 'stop' && event2 == 'start')
return original;
if (event1 == 'stop' && event2 == 'stop')
return highlighted;
... which is collapsed to:
*/
return highlighted[0].event === 'start' ? original : highlighted;
}
/**
* @param {Node} node
*/
function open(node) {
/** @param {Attr} attr */
function attributeString(attr) {
return ' ' + attr.nodeName + '="' + escapeHTML(attr.value) + '"';
}
// @ts-ignore
result += '<' + tag(node) + [].map.call(node.attributes, attributeString).join('') + '>';
}
/**
* @param {Node} node
*/
function close(node) {
result += '</' + tag(node) + '>';
}
/**
* @param {Event} event
*/
function render(event) {
(event.event === 'start' ? open : close)(event.node);
}
while (original.length || highlighted.length) {
let stream = selectStream();
result += escapeHTML(value.substring(processed, stream[0].offset));
processed = stream[0].offset;
if (stream === original) {
/*
On any opening or closing tag of the original markup we first close
the entire highlighted node stack, then render the original tag along
with all the following original tags at the same offset and then
reopen all the tags on the highlighted stack.
*/
nodeStack.reverse().forEach(close);
do {
render(stream.splice(0, 1)[0]);
stream = selectStream();
} while (stream === original && stream.length && stream[0].offset === processed);
nodeStack.reverse().forEach(open);
} else {
if (stream[0].event === 'start') {
nodeStack.push(stream[0].node);
} else {
nodeStack.pop();
}
render(stream.splice(0, 1)[0]);
}
}
return result + escapeHTML(value.substr(processed));
}
return mergeHTMLPlugin;
}());
This was built with:
rollup src/plugins/merge_html.js -f iife --output.name=mergeHTMLPlugin
After changing the source to have a single default export.