@@ -192,6 +192,17 @@ const meta = [
192192 '\\x98' , '\\x99' , '\\x9A' , '\\x9B' , '\\x9C' , '\\x9D' , '\\x9E' , '\\x9F' , // x9F
193193] ;
194194
195+ // Regex used for ansi escape code splitting
196+ // Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
197+ // License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore
198+ // Matches all ansi escape code sequences in a string
199+ const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' +
200+ '(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' +
201+ '|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))' ;
202+ const ansi = new RegExp ( ansiPattern , 'g' ) ;
203+
204+ let getStringWidth ;
205+
195206function getUserOptions ( ctx ) {
196207 return {
197208 stylize : ctx . stylize ,
@@ -1154,7 +1165,7 @@ function groupArrayElements(ctx, output, value) {
11541165 // entries length of all output entries. We have to remove colors first,
11551166 // otherwise the length would not be calculated properly.
11561167 for ( ; i < outputLength ; i ++ ) {
1157- const len = ctx . colors ? removeColors ( output [ i ] ) . length : output [ i ] . length ;
1168+ const len = getStringWidth ( output [ i ] , ctx . colors ) ;
11581169 dataLen [ i ] = len ;
11591170 totalLength += len + separatorSpace ;
11601171 if ( maxLength < len )
@@ -1197,8 +1208,6 @@ function groupArrayElements(ctx, output, value) {
11971208 if ( columns <= 1 ) {
11981209 return output ;
11991210 }
1200- // TODO(BridgeAR): Add unicode support. Use the readline getStringWidth
1201- // function.
12021211 const tmp = [ ] ;
12031212 const maxLineLength = [ ] ;
12041213 for ( let i = 0 ; i < columns ; i ++ ) {
@@ -1565,11 +1574,8 @@ function formatProperty(ctx, value, recurseTimes, key, type, desc) {
15651574 const diff = ( ctx . compact !== true || type !== kObjectType ) ? 2 : 3 ;
15661575 ctx . indentationLvl += diff ;
15671576 str = formatValue ( ctx , desc . value , recurseTimes ) ;
1568- if ( diff === 3 ) {
1569- const len = ctx . colors ? removeColors ( str ) . length : str . length ;
1570- if ( ctx . breakLength < len ) {
1571- extra = `\n${ ' ' . repeat ( ctx . indentationLvl ) } ` ;
1572- }
1577+ if ( diff === 3 && ctx . breakLength < getStringWidth ( str , ctx . colors ) ) {
1578+ extra = `\n${ ' ' . repeat ( ctx . indentationLvl ) } ` ;
15731579 }
15741580 ctx . indentationLvl -= diff ;
15751581 } else if ( desc . get !== undefined ) {
@@ -1889,9 +1895,116 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
18891895 return str ;
18901896}
18911897
1898+ if ( internalBinding ( 'config' ) . hasIntl ) {
1899+ const icu = internalBinding ( 'icu' ) ;
1900+ // icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
1901+ // Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
1902+ // TODO(BridgeAR): Expose the options to the user. That is probably the
1903+ // best thing possible at the moment, since it's difficult to know what
1904+ // the receiving end supports.
1905+ getStringWidth = function getStringWidth ( str , removeControlChars = true ) {
1906+ let width = 0 ;
1907+ if ( removeControlChars )
1908+ str = stripVTControlCharacters ( str ) ;
1909+ for ( let i = 0 ; i < str . length ; i ++ ) {
1910+ // Try to avoid calling into C++ by first handling the ASCII portion of
1911+ // the string. If it is fully ASCII, we skip the C++ part.
1912+ const code = str . charCodeAt ( i ) ;
1913+ if ( code >= 127 ) {
1914+ width += icu . getStringWidth ( str . slice ( i ) ) ;
1915+ break ;
1916+ }
1917+ width += code >= 32 ? 1 : 0 ;
1918+ }
1919+ return width ;
1920+ } ;
1921+ } else {
1922+ /**
1923+ * Returns the number of columns required to display the given string.
1924+ */
1925+ getStringWidth = function getStringWidth ( str , removeControlChars = true ) {
1926+ let width = 0 ;
1927+
1928+ if ( ! removeControlChars )
1929+ str = stripVTControlCharacters ( str ) ;
1930+
1931+ for ( const char of str ) {
1932+ const code = char . codePointAt ( 0 ) ;
1933+ if ( isFullWidthCodePoint ( code ) ) {
1934+ width += 2 ;
1935+ } else if ( ! isZeroWidthCodePoint ( code ) ) {
1936+ width ++ ;
1937+ }
1938+ }
1939+
1940+ return width ;
1941+ } ;
1942+
1943+ /**
1944+ * Returns true if the character represented by a given
1945+ * Unicode code point is full-width. Otherwise returns false.
1946+ */
1947+ const isFullWidthCodePoint = ( code ) => {
1948+ // Code points are partially derived from:
1949+ // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
1950+ return code >= 0x1100 && (
1951+ code <= 0x115f || // Hangul Jamo
1952+ code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
1953+ code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
1954+ // CJK Radicals Supplement .. Enclosed CJK Letters and Months
1955+ ( code >= 0x2e80 && code <= 0x3247 && code !== 0x303f ) ||
1956+ // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
1957+ ( code >= 0x3250 && code <= 0x4dbf ) ||
1958+ // CJK Unified Ideographs .. Yi Radicals
1959+ ( code >= 0x4e00 && code <= 0xa4c6 ) ||
1960+ // Hangul Jamo Extended-A
1961+ ( code >= 0xa960 && code <= 0xa97c ) ||
1962+ // Hangul Syllables
1963+ ( code >= 0xac00 && code <= 0xd7a3 ) ||
1964+ // CJK Compatibility Ideographs
1965+ ( code >= 0xf900 && code <= 0xfaff ) ||
1966+ // Vertical Forms
1967+ ( code >= 0xfe10 && code <= 0xfe19 ) ||
1968+ // CJK Compatibility Forms .. Small Form Variants
1969+ ( code >= 0xfe30 && code <= 0xfe6b ) ||
1970+ // Halfwidth and Fullwidth Forms
1971+ ( code >= 0xff01 && code <= 0xff60 ) ||
1972+ ( code >= 0xffe0 && code <= 0xffe6 ) ||
1973+ // Kana Supplement
1974+ ( code >= 0x1b000 && code <= 0x1b001 ) ||
1975+ // Enclosed Ideographic Supplement
1976+ ( code >= 0x1f200 && code <= 0x1f251 ) ||
1977+ // Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
1978+ // Emoticons 0x1f600 - 0x1f64f
1979+ ( code >= 0x1f300 && code <= 0x1f64f ) ||
1980+ // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
1981+ ( code >= 0x20000 && code <= 0x3fffd )
1982+ ) ;
1983+ } ;
1984+
1985+ const isZeroWidthCodePoint = ( code ) => {
1986+ return code <= 0x1F || // C0 control codes
1987+ ( code > 0x7F && code <= 0x9F ) || // C1 control codes
1988+ ( code >= 0x0300 && code <= 0x036F ) || // Combining Diacritical Marks
1989+ ( code >= 0x200B && code <= 0x200F ) || // Modifying Invisible Characters
1990+ ( code >= 0xFE00 && code <= 0xFE0F ) || // Variation Selectors
1991+ ( code >= 0xFE20 && code <= 0xFE2F ) || // Combining Half Marks
1992+ ( code >= 0xE0100 && code <= 0xE01EF ) ; // Variation Selectors
1993+ } ;
1994+ }
1995+
1996+ /**
1997+ * Remove all VT control characters. Use to estimate displayed string width.
1998+ */
1999+ function stripVTControlCharacters ( str ) {
2000+ return str . replace ( ansi , '' ) ;
2001+ }
2002+
18922003module . exports = {
18932004 inspect,
18942005 format,
18952006 formatWithOptions,
1896- inspectDefaultOptions
2007+ getStringWidth,
2008+ inspectDefaultOptions,
2009+ stripVTControlCharacters
18972010} ;
0 commit comments