Skip to content

Commit 7ba21d0

Browse files
BridgeARMylesBorins
authored andcommitted
readline: improve getStringWidth()
1. Simplify the getStringWidth function used by Intl builds by removing dead code (the options were unused) and by refactoring the logic. 2. Improve the getStringWidth unicode handling used by non-Intl builds. The getStringWidth function returned the wrong width for multiple inputs. It's now improved by supporting various zero width characters and more full width characters. PR-URL: #31112 Reviewed-By: MichaΓ«l Zasso <targos@protonmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 2e54a99 commit 7ba21d0

File tree

3 files changed

+49
-101
lines changed

3 files changed

+49
-101
lines changed

β€Žlib/internal/readline/utils.js

Lines changed: 28 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
'use strict';
22

33
const {
4-
Boolean,
5-
NumberIsInteger,
64
RegExp,
75
Symbol,
86
} = primordials;
@@ -21,7 +19,6 @@ const kEscape = '\x1b';
2119
const kSubstringSearch = Symbol('kSubstringSearch');
2220

2321
let getStringWidth;
24-
let isFullWidthCodePoint;
2522

2623
function CSI(strings, ...args) {
2724
let ret = `${kEscape}[`;
@@ -41,63 +38,37 @@ CSI.kClearScreenDown = CSI`0J`;
4138

4239
if (internalBinding('config').hasIntl) {
4340
const icu = internalBinding('icu');
44-
getStringWidth = function getStringWidth(str, options) {
45-
options = options || {};
46-
if (NumberIsInteger(str)) {
47-
// Provide information about the character with code point 'str'.
48-
return icu.getStringWidth(
49-
str,
50-
Boolean(options.ambiguousAsFullWidth),
51-
false
52-
);
53-
}
54-
str = stripVTControlCharacters(String(str));
41+
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
42+
// Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
43+
getStringWidth = function getStringWidth(str) {
5544
let width = 0;
45+
str = stripVTControlCharacters(str);
5646
for (let i = 0; i < str.length; i++) {
5747
// Try to avoid calling into C++ by first handling the ASCII portion of
5848
// the string. If it is fully ASCII, we skip the C++ part.
5949
const code = str.charCodeAt(i);
60-
if (code < 127) {
61-
width += code >= 32;
62-
continue;
50+
if (code >= 127) {
51+
width += icu.getStringWidth(str.slice(i));
52+
break;
6353
}
64-
width += icu.getStringWidth(
65-
str.slice(i),
66-
Boolean(options.ambiguousAsFullWidth),
67-
Boolean(options.expandEmojiSequence)
68-
);
69-
break;
54+
width += code >= 32 ? 1 : 0;
7055
}
7156
return width;
7257
};
73-
isFullWidthCodePoint =
74-
function isFullWidthCodePoint(code, options) {
75-
if (typeof code !== 'number')
76-
return false;
77-
return icu.getStringWidth(code, options) === 2;
78-
};
7958
} else {
8059
/**
8160
* Returns the number of columns required to display the given string.
8261
*/
8362
getStringWidth = function getStringWidth(str) {
84-
if (NumberIsInteger(str))
85-
return isFullWidthCodePoint(str) ? 2 : 1;
86-
8763
let width = 0;
8864

89-
str = stripVTControlCharacters(String(str));
90-
91-
for (let i = 0; i < str.length; i++) {
92-
const code = str.codePointAt(i);
93-
94-
if (code >= kUTF16SurrogateThreshold) { // Surrogates.
95-
i++;
96-
}
65+
str = stripVTControlCharacters(str);
9766

67+
for (const char of str) {
68+
const code = char.codePointAt(0);
9869
if (isFullWidthCodePoint(code)) {
9970
width += 2;
100-
} else {
71+
} else if (!isZeroWidthCodePoint(code)) {
10172
width++;
10273
}
10374
}
@@ -109,10 +80,10 @@ if (internalBinding('config').hasIntl) {
10980
* Returns true if the character represented by a given
11081
* Unicode code point is full-width. Otherwise returns false.
11182
*/
112-
isFullWidthCodePoint = function isFullWidthCodePoint(code) {
113-
// Code points are derived from:
83+
const isFullWidthCodePoint = (code) => {
84+
// Code points are partially derived from:
11485
// http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
115-
return NumberIsInteger(code) && code >= 0x1100 && (
86+
return code >= 0x1100 && (
11687
code <= 0x115f || // Hangul Jamo
11788
code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
11889
code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
@@ -139,10 +110,23 @@ if (internalBinding('config').hasIntl) {
139110
(code >= 0x1b000 && code <= 0x1b001) ||
140111
// Enclosed Ideographic Supplement
141112
(code >= 0x1f200 && code <= 0x1f251) ||
113+
// Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
114+
// Emoticons 0x1f600 - 0x1f64f
115+
(code >= 0x1f300 && code <= 0x1f64f) ||
142116
// CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
143117
(code >= 0x20000 && code <= 0x3fffd)
144118
);
145119
};
120+
121+
const isZeroWidthCodePoint = (code) => {
122+
return code <= 0x1F || // C0 control codes
123+
(code > 0x7F && code <= 0x9F) || // C1 control codes
124+
(code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks
125+
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
126+
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
127+
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
128+
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
129+
};
146130
}
147131

148132
/**
@@ -471,9 +455,7 @@ module.exports = {
471455
commonPrefix,
472456
emitKeys,
473457
getStringWidth,
474-
isFullWidthCodePoint,
475458
kSubstringSearch,
476-
kUTF16SurrogateThreshold,
477459
stripVTControlCharacters,
478460
CSI
479461
};

β€Žlib/readline.js

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ const {
5353
CSI,
5454
emitKeys,
5555
getStringWidth,
56-
isFullWidthCodePoint,
5756
kSubstringSearch,
58-
kUTF16SurrogateThreshold,
5957
stripVTControlCharacters
6058
} = require('internal/readline/utils');
6159

@@ -743,18 +741,14 @@ Interface.prototype._getDisplayPos = function(str) {
743741
const col = this.columns;
744742
let rows = 0;
745743
str = stripVTControlCharacters(str);
746-
for (let i = 0, len = str.length; i < len; i++) {
747-
const code = str.codePointAt(i);
748-
if (code >= kUTF16SurrogateThreshold) { // Surrogates.
749-
i++;
750-
}
751-
if (code === 0x0a) { // new line \n
752-
// rows must be incremented by 1 even if offset = 0 or col = +Infinity
744+
for (const char of str) {
745+
if (char === '\n') {
746+
// Rows must be incremented by 1 even if offset = 0 or col = +Infinity.
753747
rows += MathCeil(offset / col) || 1;
754748
offset = 0;
755749
continue;
756750
}
757-
const width = getStringWidth(code);
751+
const width = getStringWidth(char);
758752
if (width === 0 || width === 1) {
759753
offset += width;
760754
} else { // width === 2
@@ -781,7 +775,7 @@ Interface.prototype.getCursorPos = function() {
781775
// move the cursor to the beginning of the next line.
782776
if (cols + 1 === columns &&
783777
this.cursor < this.line.length &&
784-
isFullWidthCodePoint(this.line.codePointAt(this.cursor))) {
778+
getStringWidth(this.line[this.cursor]) > 1) {
785779
rows++;
786780
cols = 0;
787781
}

β€Žtest/parallel/test-readline-interface.js

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -731,11 +731,7 @@ function isWarned(emitter) {
731731
fi.emit('keypress', '.', { name: 'right' });
732732
cursorPos = rli.getCursorPos();
733733
assert.strictEqual(cursorPos.rows, 0);
734-
if (common.hasIntl) {
735-
assert.strictEqual(cursorPos.cols, 2);
736-
} else {
737-
assert.strictEqual(cursorPos.cols, 1);
738-
}
734+
assert.strictEqual(cursorPos.cols, 2);
739735

740736
rli.on('line', common.mustCall((line) => {
741737
assert.strictEqual(line, 'πŸ’»');
@@ -764,14 +760,7 @@ function isWarned(emitter) {
764760
fi.emit('data', 'πŸ•');
765761
cursorPos = rli.getCursorPos();
766762
assert.strictEqual(cursorPos.rows, 0);
767-
768-
if (common.hasIntl) {
769-
assert.strictEqual(cursorPos.cols, 2);
770-
} else {
771-
assert.strictEqual(cursorPos.cols, 1);
772-
// Fix cursor position without internationalization
773-
fi.emit('keypress', '.', { name: 'left' });
774-
}
763+
assert.strictEqual(cursorPos.cols, 2);
775764

776765
rli.on('line', common.mustCall((line) => {
777766
assert.strictEqual(line, 'πŸ•πŸ’»');
@@ -795,22 +784,12 @@ function isWarned(emitter) {
795784
fi.emit('keypress', '.', { name: 'right' });
796785
let cursorPos = rli.getCursorPos();
797786
assert.strictEqual(cursorPos.rows, 0);
798-
if (common.hasIntl) {
799-
assert.strictEqual(cursorPos.cols, 2);
800-
} else {
801-
assert.strictEqual(cursorPos.cols, 1);
802-
// Fix cursor position without internationalization
803-
fi.emit('keypress', '.', { name: 'right' });
804-
}
787+
assert.strictEqual(cursorPos.cols, 2);
805788

806789
fi.emit('data', 'πŸ•');
807790
cursorPos = rli.getCursorPos();
808791
assert.strictEqual(cursorPos.rows, 0);
809-
if (common.hasIntl) {
810-
assert.strictEqual(cursorPos.cols, 4);
811-
} else {
812-
assert.strictEqual(cursorPos.cols, 2);
813-
}
792+
assert.strictEqual(cursorPos.cols, 4);
814793

815794
rli.on('line', common.mustCall((line) => {
816795
assert.strictEqual(line, 'πŸ’»πŸ•');
@@ -972,11 +951,7 @@ function isWarned(emitter) {
972951
fi.emit('data', 'πŸ’»');
973952
let cursorPos = rli.getCursorPos();
974953
assert.strictEqual(cursorPos.rows, 0);
975-
if (common.hasIntl) {
976-
assert.strictEqual(cursorPos.cols, 2);
977-
} else {
978-
assert.strictEqual(cursorPos.cols, 1);
979-
}
954+
assert.strictEqual(cursorPos.cols, 2);
980955
// Delete left character
981956
fi.emit('keypress', '.', { ctrl: true, name: 'h' });
982957
cursorPos = rli.getCursorPos();
@@ -1159,27 +1134,24 @@ function isWarned(emitter) {
11591134
}
11601135
}
11611136

1162-
// isFullWidthCodePoint() should return false for non-numeric values
1163-
[true, false, null, undefined, {}, [], 'あ'].forEach((v) => {
1164-
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'), false);
1165-
});
1166-
11671137
// Wide characters should be treated as two columns.
1168-
assert.strictEqual(internalReadline.isFullWidthCodePoint('a'.charCodeAt(0)),
1169-
false);
1170-
assert.strictEqual(internalReadline.isFullWidthCodePoint('あ'.charCodeAt(0)),
1171-
true);
1172-
assert.strictEqual(internalReadline.isFullWidthCodePoint('θ°’'.charCodeAt(0)),
1173-
true);
1174-
assert.strictEqual(internalReadline.isFullWidthCodePoint('κ³ '.charCodeAt(0)),
1175-
true);
1176-
assert.strictEqual(internalReadline.isFullWidthCodePoint(0x1f251), true);
1138+
assert.strictEqual(internalReadline.getStringWidth('a'), 1);
1139+
assert.strictEqual(internalReadline.getStringWidth('あ'), 2);
1140+
assert.strictEqual(internalReadline.getStringWidth('θ°’'), 2);
1141+
assert.strictEqual(internalReadline.getStringWidth('κ³ '), 2);
1142+
assert.strictEqual(
1143+
internalReadline.getStringWidth(String.fromCodePoint(0x1f251)), 2);
11771144
assert.strictEqual(internalReadline.getStringWidth('abcde'), 5);
11781145
assert.strictEqual(internalReadline.getStringWidth('叀池や'), 6);
11791146
assert.strictEqual(internalReadline.getStringWidth('γƒŽγƒΌγƒ‰.js'), 9);
11801147
assert.strictEqual(internalReadline.getStringWidth('δ½ ε₯½'), 4);
11811148
assert.strictEqual(internalReadline.getStringWidth('μ•ˆλ…•ν•˜μ„Έμš”'), 10);
11821149
assert.strictEqual(internalReadline.getStringWidth('A\ud83c\ude00BC'), 5);
1150+
assert.strictEqual(internalReadline.getStringWidth('πŸ‘¨β€πŸ‘©β€πŸ‘¦β€πŸ‘¦'), 8);
1151+
assert.strictEqual(internalReadline.getStringWidth('πŸ•π·γ‚πŸ’»πŸ˜€'), 9);
1152+
// TODO(BridgeAR): This should have a width of 4.
1153+
assert.strictEqual(internalReadline.getStringWidth('⓬β“ͺ'), 2);
1154+
assert.strictEqual(internalReadline.getStringWidth('\u0301\u200D\u200E'), 0);
11831155

11841156
// Check if vt control chars are stripped
11851157
assert.strictEqual(

0 commit comments

Comments
Β (0)