Skip to content

Commit

Permalink
readline: improve getStringWidth()
Browse files Browse the repository at this point in the history
1. Simplify the getStringWidth function used by Intl builds by removing
   dead code (the options were unused) and by refactoring the logic.
2. Improve the getStringWidth unicode handling used by non-Intl builds.
   The getStringWidth function returned the wrong width for multiple
   inputs. It's now improved by supporting various zero width characters
   and more full width characters.

PR-URL: nodejs#31112
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
BridgeAR authored and targos committed Apr 25, 2020
1 parent fde18ac commit 7163b1e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 101 deletions.
74 changes: 28 additions & 46 deletions lib/internal/readline/utils.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
'use strict';

const {
Boolean,
NumberIsInteger,
RegExp,
Symbol,
} = primordials;
Expand All @@ -21,7 +19,6 @@ const kEscape = '\x1b';
const kSubstringSearch = Symbol('kSubstringSearch');

let getStringWidth;
let isFullWidthCodePoint;

function CSI(strings, ...args) {
let ret = `${kEscape}[`;
Expand All @@ -41,63 +38,37 @@ CSI.kClearScreenDown = CSI`0J`;

if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
getStringWidth = function getStringWidth(str, options) {
options = options || {};
if (NumberIsInteger(str)) {
// Provide information about the character with code point 'str'.
return icu.getStringWidth(
str,
Boolean(options.ambiguousAsFullWidth),
false
);
}
str = stripVTControlCharacters(String(str));
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
// Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
getStringWidth = function getStringWidth(str) {
let width = 0;
str = stripVTControlCharacters(str);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
const code = str.charCodeAt(i);
if (code < 127) {
width += code >= 32;
continue;
if (code >= 127) {
width += icu.getStringWidth(str.slice(i));
break;
}
width += icu.getStringWidth(
str.slice(i),
Boolean(options.ambiguousAsFullWidth),
Boolean(options.expandEmojiSequence)
);
break;
width += code >= 32 ? 1 : 0;
}
return width;
};
isFullWidthCodePoint =
function isFullWidthCodePoint(code, options) {
if (typeof code !== 'number')
return false;
return icu.getStringWidth(code, options) === 2;
};
} else {
/**
* Returns the number of columns required to display the given string.
*/
getStringWidth = function getStringWidth(str) {
if (NumberIsInteger(str))
return isFullWidthCodePoint(str) ? 2 : 1;

let width = 0;

str = stripVTControlCharacters(String(str));

for (let i = 0; i < str.length; i++) {
const code = str.codePointAt(i);

if (code >= kUTF16SurrogateThreshold) { // Surrogates.
i++;
}
str = stripVTControlCharacters(str);

for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
width += 2;
} else {
} else if (!isZeroWidthCodePoint(code)) {
width++;
}
}
Expand All @@ -109,10 +80,10 @@ if (internalBinding('config').hasIntl) {
* Returns true if the character represented by a given
* Unicode code point is full-width. Otherwise returns false.
*/
isFullWidthCodePoint = function isFullWidthCodePoint(code) {
// Code points are derived from:
const isFullWidthCodePoint = (code) => {
// Code points are partially derived from:
// http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
return NumberIsInteger(code) && code >= 0x1100 && (
return code >= 0x1100 && (
code <= 0x115f || // Hangul Jamo
code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
Expand All @@ -139,10 +110,23 @@ if (internalBinding('config').hasIntl) {
(code >= 0x1b000 && code <= 0x1b001) ||
// Enclosed Ideographic Supplement
(code >= 0x1f200 && code <= 0x1f251) ||
// Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
// Emoticons 0x1f600 - 0x1f64f
(code >= 0x1f300 && code <= 0x1f64f) ||
// CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
(code >= 0x20000 && code <= 0x3fffd)
);
};

const isZeroWidthCodePoint = (code) => {
return code <= 0x1F || // C0 control codes
(code > 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
};
}

/**
Expand Down Expand Up @@ -471,9 +455,7 @@ module.exports = {
commonPrefix,
emitKeys,
getStringWidth,
isFullWidthCodePoint,
kSubstringSearch,
kUTF16SurrogateThreshold,
stripVTControlCharacters,
CSI
};
16 changes: 5 additions & 11 deletions lib/readline.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ const {
CSI,
emitKeys,
getStringWidth,
isFullWidthCodePoint,
kSubstringSearch,
kUTF16SurrogateThreshold,
stripVTControlCharacters
} = require('internal/readline/utils');

Expand Down Expand Up @@ -743,18 +741,14 @@ Interface.prototype._getDisplayPos = function(str) {
const col = this.columns;
let rows = 0;
str = stripVTControlCharacters(str);
for (let i = 0, len = str.length; i < len; i++) {
const code = str.codePointAt(i);
if (code >= kUTF16SurrogateThreshold) { // Surrogates.
i++;
}
if (code === 0x0a) { // new line \n
// rows must be incremented by 1 even if offset = 0 or col = +Infinity
for (const char of str) {
if (char === '\n') {
// Rows must be incremented by 1 even if offset = 0 or col = +Infinity.
rows += MathCeil(offset / col) || 1;
offset = 0;
continue;
}
const width = getStringWidth(code);
const width = getStringWidth(char);
if (width === 0 || width === 1) {
offset += width;
} else { // width === 2
Expand All @@ -781,7 +775,7 @@ Interface.prototype.getCursorPos = function() {
// move the cursor to the beginning of the next line.
if (cols + 1 === columns &&
this.cursor < this.line.length &&
isFullWidthCodePoint(this.line.codePointAt(this.cursor))) {
getStringWidth(this.line[this.cursor]) > 1) {
rows++;
cols = 0;
}
Expand Down
60 changes: 16 additions & 44 deletions test/parallel/test-readline-interface.js
Original file line number Diff line number Diff line change
Expand Up @@ -716,11 +716,7 @@ function isWarned(emitter) {
fi.emit('keypress', '.', { name: 'right' });
cursorPos = rli.getCursorPos();
assert.strictEqual(cursorPos.rows, 0);
if (common.hasIntl) {
assert.strictEqual(cursorPos.cols, 2);
} else {
assert.strictEqual(cursorPos.cols, 1);
}
assert.strictEqual(cursorPos.cols, 2);

rli.on('line', common.mustCall((line) => {
assert.strictEqual(line, '💻');
Expand Down Expand Up @@ -749,14 +745,7 @@ function isWarned(emitter) {
fi.emit('data', '🐕');
cursorPos = rli.getCursorPos();
assert.strictEqual(cursorPos.rows, 0);

if (common.hasIntl) {
assert.strictEqual(cursorPos.cols, 2);
} else {
assert.strictEqual(cursorPos.cols, 1);
// Fix cursor position without internationalization
fi.emit('keypress', '.', { name: 'left' });
}
assert.strictEqual(cursorPos.cols, 2);

rli.on('line', common.mustCall((line) => {
assert.strictEqual(line, '🐕💻');
Expand All @@ -780,22 +769,12 @@ function isWarned(emitter) {
fi.emit('keypress', '.', { name: 'right' });
let cursorPos = rli.getCursorPos();
assert.strictEqual(cursorPos.rows, 0);
if (common.hasIntl) {
assert.strictEqual(cursorPos.cols, 2);
} else {
assert.strictEqual(cursorPos.cols, 1);
// Fix cursor position without internationalization
fi.emit('keypress', '.', { name: 'right' });
}
assert.strictEqual(cursorPos.cols, 2);

fi.emit('data', '🐕');
cursorPos = rli.getCursorPos();
assert.strictEqual(cursorPos.rows, 0);
if (common.hasIntl) {
assert.strictEqual(cursorPos.cols, 4);
} else {
assert.strictEqual(cursorPos.cols, 2);
}
assert.strictEqual(cursorPos.cols, 4);

rli.on('line', common.mustCall((line) => {
assert.strictEqual(line, '💻🐕');
Expand Down Expand Up @@ -957,11 +936,7 @@ function isWarned(emitter) {
fi.emit('data', '💻');
let cursorPos = rli.getCursorPos();
assert.strictEqual(cursorPos.rows, 0);
if (common.hasIntl) {
assert.strictEqual(cursorPos.cols, 2);
} else {
assert.strictEqual(cursorPos.cols, 1);
}
assert.strictEqual(cursorPos.cols, 2);
// Delete left character
fi.emit('keypress', '.', { ctrl: true, name: 'h' });
cursorPos = rli.getCursorPos();
Expand Down Expand Up @@ -1144,27 +1119,24 @@ function isWarned(emitter) {
}
}

// isFullWidthCodePoint() should return false for non-numeric values
[true, false, null, undefined, {}, [], 'あ'].forEach((v) => {
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'), false);
});

// Wide characters should be treated as two columns.
assert.strictEqual(internalReadline.isFullWidthCodePoint('a'.charCodeAt(0)),
false);
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'.charCodeAt(0)),
true);
assert.strictEqual(internalReadline.isFullWidthCodePoint('谢'.charCodeAt(0)),
true);
assert.strictEqual(internalReadline.isFullWidthCodePoint('고'.charCodeAt(0)),
true);
assert.strictEqual(internalReadline.isFullWidthCodePoint(0x1f251), true);
assert.strictEqual(internalReadline.getStringWidth('a'), 1);
assert.strictEqual(internalReadline.getStringWidth('あ'), 2);
assert.strictEqual(internalReadline.getStringWidth('谢'), 2);
assert.strictEqual(internalReadline.getStringWidth('고'), 2);
assert.strictEqual(
internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2);
assert.strictEqual(internalReadline.getStringWidth('abcde'), 5);
assert.strictEqual(internalReadline.getStringWidth('古池や'), 6);
assert.strictEqual(internalReadline.getStringWidth('ノード.js'), 9);
assert.strictEqual(internalReadline.getStringWidth('你好'), 4);
assert.strictEqual(internalReadline.getStringWidth('안녕하세요'), 10);
assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5);
assert.strictEqual(internalReadline.getStringWidth('👨‍👩‍👦‍👦'), 8);
assert.strictEqual(internalReadline.getStringWidth('🐕𐐷あ💻😀'), 9);
// TODO(BridgeAR): This should have a width of 4.
assert.strictEqual(internalReadline.getStringWidth('⓬⓪'), 2);
assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0);

// Check if vt control chars are stripped
assert.strictEqual(
Expand Down

0 comments on commit 7163b1e

Please sign in to comment.