Skip to content

Commit f3fb6a1

Browse files
BridgeARtargos
authored andcommitted
src: change GetStringWidth's expand_emoji_sequence option default
The option is now set to true by default. Most terminals do not have full emoji support and visualize emojis with zero width joiners as individual emojis. Also verify that at least one argument is always passed through to the function and remove support for passing through code points. Only accept strings from now on to simplify the API. PR-URL: #31112 Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 4f6300f commit f3fb6a1

File tree

3 files changed

+43
-51
lines changed

3 files changed

+43
-51
lines changed

lib/internal/readline/utils.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ if (internalBinding('config').hasIntl) {
6363
const icu = internalBinding('icu');
6464
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
6565
// Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
66+
// TODO(BridgeAR): Expose the options to the user. That is probably the
67+
// best thing possible at the moment, since it's difficult to know what
68+
// the receiving end supports.
6669
getStringWidth = function getStringWidth(str) {
6770
let width = 0;
6871
str = stripVTControlCharacters(str);

src/node_i18n.cc

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -767,18 +767,10 @@ static int GetColumnWidth(UChar32 codepoint,
767767
// Returns the column width for the given String.
768768
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
769769
Environment* env = Environment::GetCurrent(args);
770-
if (args.Length() < 1)
771-
return;
770+
CHECK(args[0]->IsString());
772771

773772
bool ambiguous_as_full_width = args[1]->IsTrue();
774-
bool expand_emoji_sequence = args[2]->IsTrue();
775-
776-
if (args[0]->IsNumber()) {
777-
uint32_t val;
778-
if (!args[0]->Uint32Value(env->context()).To(&val)) return;
779-
args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width));
780-
return;
781-
}
773+
bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
782774

783775
TwoByteValue value(env->isolate(), args[0]);
784776
// reinterpret_cast is required by windows to compile
@@ -803,6 +795,7 @@ static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
803795
// in advance if a particular sequence is going to be supported.
804796
// The expand_emoji_sequence option allows the caller to skip this
805797
// check and count each code within an emoji sequence separately.
798+
// https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
806799
if (!expand_emoji_sequence &&
807800
n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner)
808801
(u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||

test/parallel/test-icu-stringwidth.js

Lines changed: 37 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,82 +12,78 @@ const readline = require('internal/readline/utils');
1212

1313
// Ll (Lowercase Letter): LATIN SMALL LETTER A
1414
assert.strictEqual(readline.getStringWidth('a'), 1);
15-
assert.strictEqual(readline.getStringWidth(0x0061), 1);
15+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0061)), 1);
1616
// Lo (Other Letter)
1717
assert.strictEqual(readline.getStringWidth('丁'), 2);
18-
assert.strictEqual(readline.getStringWidth(0x4E01), 2);
18+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x4E01)), 2);
1919
// Surrogate pairs
20-
assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2);
20+
assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
2121
assert.strictEqual(readline.getStringWidth('👅'), 2);
2222
// Cs (Surrogate): High Surrogate
2323
assert.strictEqual(readline.getStringWidth('\ud83d'), 1);
2424
// Cs (Surrogate): Low Surrogate
2525
assert.strictEqual(readline.getStringWidth('\udc78'), 1);
2626
// Cc (Control): NULL
27-
assert.strictEqual(readline.getStringWidth(0), 0);
27+
assert.strictEqual(readline.getStringWidth('\u0000'), 0);
2828
// Cc (Control): BELL
29-
assert.strictEqual(readline.getStringWidth(0x0007), 0);
29+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0007)), 0);
3030
// Cc (Control): LINE FEED
3131
assert.strictEqual(readline.getStringWidth('\n'), 0);
3232
// Cf (Format): SOFT HYPHEN
33-
assert.strictEqual(readline.getStringWidth(0x00AD), 1);
33+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x00AD)), 1);
3434
// Cf (Format): LEFT-TO-RIGHT MARK
3535
// Cf (Format): RIGHT-TO-LEFT MARK
3636
assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1);
3737
// Cn (Unassigned): Not a character
38-
assert.strictEqual(readline.getStringWidth(0x10FFEF), 1);
38+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x10FFEF)), 1);
3939
// Cn (Unassigned): Not a character (but in a CJK range)
40-
assert.strictEqual(readline.getStringWidth(0x3FFEF), 2);
40+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x3FFEF)), 1);
4141
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
42-
assert.strictEqual(readline.getStringWidth(0x0301), 0);
42+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x0301)), 0);
4343
// Mc (Spacing Mark): BALINESE ADEG ADEG
4444
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
4545
// character.
46-
assert.strictEqual(readline.getStringWidth(0x1B44), 1);
46+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x1B44)), 1);
4747
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
48-
assert.strictEqual(readline.getStringWidth(0x20DD), 0);
48+
assert.strictEqual(readline.getStringWidth(String.fromCharCode(0x20DD)), 0);
4949

50-
// The following is an emoji sequence. In some implementations, it is
51-
// represented as a single glyph, in other implementations as a sequence
52-
// of individual glyphs. By default, the algorithm will assume the single
53-
// glyph interpretation and return a value of 2. By passing the
54-
// expandEmojiSequence: true option, each component will be counted
55-
// individually.
56-
assert.strictEqual(readline.getStringWidth('👩‍👩‍👧‍👧'), 2);
57-
assert.strictEqual(
58-
readline.getStringWidth('👩‍👩‍👧‍👧', { expandEmojiSequence: true }), 8);
50+
// The following is an emoji sequence with ZWJ (zero-width-joiner). In some
51+
// implementations, it is represented as a single glyph, in other
52+
// implementations as a sequence of individual glyphs. By default, each
53+
// component will be counted individually, since not a lot of systems support
54+
// these fully.
55+
// See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
56+
assert.strictEqual(readline.getStringWidth('👩‍👩‍👧‍👧'), 8);
57+
// TODO(BridgeAR): This should have a width of two and six. The heart contains
58+
// the \uFE0F variation selector that indicates that it should be displayed as
59+
// emoji instead of as text. Emojis are all full width characters when not being
60+
// rendered as text.
61+
// https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)
62+
assert.strictEqual(readline.getStringWidth('❤️'), 1);
63+
assert.strictEqual(readline.getStringWidth('👩‍❤️‍👩'), 5);
64+
// The length of one is correct. It is an emoji treated as text.
65+
assert.strictEqual(readline.getStringWidth('❤'), 1);
5966

6067
// By default, unicode characters whose width is considered ambiguous will
6168
// be considered half-width. For these characters, getStringWidth will return
6269
// 1. In some contexts, however, it is more appropriate to consider them full
63-
// width. By default, the algorithm will assume half width. By passing
64-
// the ambiguousAsFullWidth: true option, ambiguous characters will be counted
65-
// as 2 columns.
70+
// width. By default, the algorithm will assume half width.
6671
assert.strictEqual(readline.getStringWidth('\u01d4'), 1);
67-
assert.strictEqual(
68-
readline.getStringWidth('\u01d4', { ambiguousAsFullWidth: true }), 2);
6972

7073
// Control chars and combining chars are zero
7174
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
7275

7376
// Test that the fast path for ASCII characters yields results consistent
7477
// with the 'slow' path.
75-
for (const ambiguousAsFullWidth of [ false, true ]) {
76-
for (let i = 0; i < 256; i++) {
77-
const char = String.fromCharCode(i);
78-
assert.strictEqual(
79-
readline.getStringWidth(i, { ambiguousAsFullWidth }),
80-
readline.getStringWidth(char, { ambiguousAsFullWidth }));
81-
assert.strictEqual(
82-
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
83-
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
78+
for (let i = 0; i < 256; i++) {
79+
const char = String.fromCharCode(i);
80+
assert.strictEqual(
81+
readline.getStringWidth(char + '🎉'),
82+
readline.getStringWidth(char) + 2);
8483

85-
if (i < 32 || (i >= 127 && i < 160)) { // Control character
86-
assert.strictEqual(
87-
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
88-
} else if (i < 127) { // Regular ASCII character
89-
assert.strictEqual(
90-
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
91-
}
84+
if (i < 32 || (i >= 127 && i < 160)) { // Control character
85+
assert.strictEqual(readline.getStringWidth(char), 0);
86+
} else { // Regular ASCII character
87+
assert.strictEqual(readline.getStringWidth(char), 1);
9288
}
9389
}

0 commit comments

Comments
 (0)