Skip to content

Commit

Permalink
util: escape lone surrogate code points using .inspect()
Browse files Browse the repository at this point in the history
Unpaired surrogate code points have no representation in UTF8.
Therefore, such code points are just "random" output that is
unreadable. Instead, escape the code points similar to C0 and C1
control characters.

Refs: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs

Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>

PR-URL: nodejs#41001
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
BridgeAR authored Dec 6, 2021
1 parent 2f5e4b4 commit 7637260
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 8 deletions.
28 changes: 20 additions & 8 deletions lib/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ const kArrayType = 1;
const kArrayExtrasType = 2;

/* eslint-disable no-control-regex */
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]/;
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
/* eslint-enable no-control-regex */

const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
Expand Down Expand Up @@ -463,7 +463,10 @@ function addQuotes(str, quotes) {
return `'${str}'`;
}

const escapeFn = (str) => meta[StringPrototypeCharCodeAt(str)];
function escapeFn(str) {
const charCode = StringPrototypeCharCodeAt(str);
return meta.length > charCode ? meta[charCode] : `\\u${charCode.toString(16)}`;
}

// Escape control characters, single quotes and the backslash.
// This is similar to JSON stringify escaping.
Expand Down Expand Up @@ -501,8 +504,7 @@ function strEscape(str) {

let result = '';
let last = 0;
const lastIndex = str.length;
for (let i = 0; i < lastIndex; i++) {
for (let i = 0; i < str.length; i++) {
const point = StringPrototypeCharCodeAt(str, i);
if (point === singleQuote ||
point === 92 ||
Expand All @@ -514,10 +516,20 @@ function strEscape(str) {
result += `${StringPrototypeSlice(str, last, i)}${meta[point]}`;
}
last = i + 1;
} else if (point >= 0xd800 && point <= 0xdfff) {
if (point <= 0xdbff && i + 1 < str.length) {
const point = StringPrototypeCharCodeAt(str, i + 1);
if (point >= 0xdc00 && point <= 0xdfff) {
i++;
continue;
}
}
result += `${StringPrototypeSlice(str, last, i)}${`\\u${point.toString(16)}`}`;
last = i + 1;
}
}

if (last !== lastIndex) {
if (last !== str.length) {
result += StringPrototypeSlice(str, last);
}
return addQuotes(result, singleQuote);
Expand Down
42 changes: 42 additions & 0 deletions test/parallel/test-util-inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,48 @@ assert.strictEqual(util.inspect(Object.create(Date.prototype)), 'Date {}');
);
}

// Escape unpaired surrogate pairs.
{
const edgeChar = String.fromCharCode(0xd799);

for (let charCode = 0xD800; charCode < 0xDFFF; charCode++) {
const surrogate = String.fromCharCode(charCode);

assert.strictEqual(
util.inspect(surrogate),
`'\\u${charCode.toString(16)}'`
);
assert.strictEqual(
util.inspect(`${'a'.repeat(200)}${surrogate}`),
`'${'a'.repeat(200)}\\u${charCode.toString(16)}'`
);
assert.strictEqual(
util.inspect(`${surrogate}${'a'.repeat(200)}`),
`'\\u${charCode.toString(16)}${'a'.repeat(200)}'`
);
if (charCode < 0xdc00) {
const highSurrogate = surrogate;
const lowSurrogate = String.fromCharCode(charCode + 1024);
assert(
!util.inspect(
`${edgeChar}${highSurrogate}${lowSurrogate}${edgeChar}`
).includes('\\u')
);
assert.strictEqual(
(util.inspect(
`${highSurrogate}${highSurrogate}${lowSurrogate}`
).match(/\\u/g) ?? []).length,
1
);
} else {
assert.strictEqual(
util.inspect(`${edgeChar}${surrogate}${edgeChar}`),
`'${edgeChar}\\u${charCode.toString(16)}${edgeChar}'`
);
}
}
}

// Test util.inspect.styles and util.inspect.colors.
{
function testColorStyle(style, input, implicit) {
Expand Down

0 comments on commit 7637260

Please sign in to comment.