Skip to content

Commit 43a826d

Browse files
committed
Fix narrow width: Added StrictEmojiNarrow
1 parent c9bd7d1 commit 43a826d

File tree

3 files changed

+179
-80
lines changed

3 files changed

+179
-80
lines changed

benchmark_test.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,78 @@ import (
77

88
var benchSink int
99

10+
//
11+
// RuneWidth
12+
//
13+
14+
func benchRuneWidth(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int {
15+
n := 0
16+
got := -1
17+
c := NewCondition()
18+
c.EastAsianWidth = eastAsianWidth
19+
for i := 0; i < b.N; i++ {
20+
got = n
21+
for r := start; r < stop; r++ {
22+
n += c.RuneWidth(r)
23+
}
24+
got = n - got
25+
}
26+
if want != 0 && got != want { // some extra checks
27+
b.Errorf("got %d, want %d\n", got, want)
28+
}
29+
return n
30+
}
31+
func BenchmarkRuneWidthAll(b *testing.B) {
32+
benchSink = benchRuneWidth(b, false, 0, utf8.MaxRune+1, 1293932)
33+
}
34+
func BenchmarkRuneWidth768(b *testing.B) {
35+
benchSink = benchRuneWidth(b, false, 0, 0x300, 702)
36+
}
37+
func BenchmarkRuneWidthAllEastAsian(b *testing.B) {
38+
benchSink = benchRuneWidth(b, true, 0, utf8.MaxRune+1, 1432558)
39+
}
40+
func BenchmarkRuneWidth768EastAsian(b *testing.B) {
41+
benchSink = benchRuneWidth(b, true, 0, 0x300, 794)
42+
}
43+
44+
//
45+
// String1Width - strings which consist of a single rune
46+
//
47+
48+
func benchString1Width(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int {
49+
n := 0
50+
got := -1
51+
c := NewCondition()
52+
c.EastAsianWidth = eastAsianWidth
53+
for i := 0; i < b.N; i++ {
54+
got = n
55+
for r := start; r < stop; r++ {
56+
s := string(r)
57+
n += c.StringWidth(s)
58+
}
59+
got = n - got
60+
}
61+
if want != 0 && got != want { // some extra checks
62+
b.Errorf("got %d, want %d\n", got, want)
63+
}
64+
return n
65+
}
66+
func BenchmarkString1WidthAll(b *testing.B) {
67+
benchSink = benchString1Width(b, false, 0, utf8.MaxRune+1, 1295980)
68+
}
69+
func BenchmarkString1Width768(b *testing.B) {
70+
benchSink = benchString1Width(b, false, 0, 0x300, 702)
71+
}
72+
func BenchmarkString1WidthAllEastAsian(b *testing.B) {
73+
benchSink = benchString1Width(b, true, 0, utf8.MaxRune+1, 1436654)
74+
}
75+
func BenchmarkString1Width768EastAsian(b *testing.B) {
76+
benchSink = benchString1Width(b, true, 0, 0x300, 794)
77+
}
78+
79+
//
80+
// tables
81+
//
1082
func benchTable(b *testing.B, tbl table) int {
1183
n := 0
1284
for i := 0; i < b.N; i++ {

runewidth.go

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,14 @@ var (
1212
// EastAsianWidth will be set true if the current locale is CJK
1313
EastAsianWidth bool
1414

15+
// StrictEmojiNeutral should be set false if handle broken fonts
16+
StrictEmojiNeutral bool = true
17+
1518
// DefaultCondition is a condition in current locale
16-
DefaultCondition = &Condition{}
19+
DefaultCondition = &Condition{
20+
EastAsianWidth: false,
21+
StrictEmojiNeutral: true,
22+
}
1723
)
1824

1925
func init() {
@@ -83,28 +89,52 @@ var nonprint = table{
8389

8490
// Condition have flag EastAsianWidth whether the current locale is CJK or not.
8591
type Condition struct {
86-
EastAsianWidth bool
92+
EastAsianWidth bool
93+
StrictEmojiNeutral bool
8794
}
8895

8996
// NewCondition return new instance of Condition which is current locale.
9097
func NewCondition() *Condition {
9198
return &Condition{
92-
EastAsianWidth: EastAsianWidth,
99+
EastAsianWidth: EastAsianWidth,
100+
StrictEmojiNeutral: StrictEmojiNeutral,
93101
}
94102
}
95103

96104
// RuneWidth returns the number of cells in r.
97105
// See http://www.unicode.org/reports/tr11/
98106
func (c *Condition) RuneWidth(r rune) int {
99-
switch {
100-
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
101-
return 0
102-
case inTables(r, narrow):
103-
return 1
104-
case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth, neutral):
105-
return 2
106-
default:
107-
return 1
107+
// optimized version, verified by TestRuneWidthChecksums()
108+
if !c.EastAsianWidth {
109+
switch {
110+
case r < 0x20 || r > 0x10FFFF:
111+
return 0
112+
case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint
113+
return 0
114+
case r < 0x300:
115+
return 1
116+
case inTable(r, narrow):
117+
return 1
118+
case inTables(r, nonprint, combining):
119+
return 0
120+
case inTable(r, doublewidth):
121+
return 2
122+
default:
123+
return 1
124+
}
125+
} else {
126+
switch {
127+
case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining):
128+
return 0
129+
case inTable(r, narrow):
130+
return 1
131+
case inTables(r, ambiguous, doublewidth):
132+
return 2
133+
case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow):
134+
return 2
135+
default:
136+
return 1
137+
}
108138
}
109139
}
110140

runewidth_test.go

Lines changed: 65 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,31 @@ func TestTableChecksums(t *testing.T) {
6565
}
6666
}
6767

68+
func TestRuneWidthChecksums(t *testing.T) {
69+
var testcases = []struct {
70+
name string
71+
eastAsianWidth bool
72+
wantSHA string
73+
}{
74+
{"ea-no", false, "4eb632b105d3b2c800dda9141381d0b8a95250a3a5c7f1a5ca2c4d4daaa85234"},
75+
{"ea-yes", true, "c2ddc3bdf42d81d4c23050e21eda46eb639b38b15322d35e8eb6c26f3b83ce92"},
76+
}
77+
78+
for _, testcase := range testcases {
79+
c := NewCondition()
80+
c.EastAsianWidth = testcase.eastAsianWidth
81+
buf := make([]byte, utf8.MaxRune+1)
82+
for r := rune(0); r <= utf8.MaxRune; r++ {
83+
buf[r] = byte(c.RuneWidth(r))
84+
}
85+
gotSHA := fmt.Sprintf("%x", sha256.Sum256(buf))
86+
if gotSHA != testcase.wantSHA {
87+
t.Errorf("TestRuneWidthChecksums = %s,\n\tsha256 = %s want %s",
88+
testcase.name, gotSHA, testcase.wantSHA)
89+
}
90+
}
91+
}
92+
6893
func checkInterval(first, last rune) bool {
6994
return first >= 0 && first <= utf8.MaxRune &&
7095
last >= 0 && last <= utf8.MaxRune &&
@@ -87,99 +112,71 @@ func isCompact(t *testing.T, ti *tableInfo) bool {
87112
return true
88113
}
89114

90-
// This is a utility function in case that a table has changed.
91-
func printCompactTable(tbl table) {
92-
counter := 0
93-
printEntry := func(first, last rune) {
94-
if counter%3 == 0 {
95-
fmt.Printf("\t")
96-
}
97-
fmt.Printf("{0x%04X, 0x%04X},", first, last)
98-
if (counter+1)%3 == 0 {
99-
fmt.Printf("\n")
100-
} else {
101-
fmt.Printf(" ")
102-
}
103-
counter++
104-
}
105-
106-
sort.Sort(&tbl) // just in case
107-
first := rune(-1)
108-
for i := range tbl {
109-
e := tbl[i]
110-
if !checkInterval(e.first, e.last) { // sanity check
111-
panic("invalid table")
112-
}
113-
if first < 0 {
114-
first = e.first
115-
}
116-
if i+1 < len(tbl) && e.last+1 >= tbl[i+1].first { // can be combined into one entry
117-
continue
118-
}
119-
printEntry(first, e.last)
120-
first = -1
121-
}
122-
fmt.Printf("\n\n")
123-
}
124-
125115
func TestSorted(t *testing.T) {
126116
for _, ti := range tables {
127117
if !sort.IsSorted(&ti.tbl) {
128118
t.Errorf("table not sorted: %s", ti.name)
129119
}
130120
if !isCompact(t, &ti) {
131121
t.Errorf("table not compact: %s", ti.name)
132-
//printCompactTable(ti.tbl)
133122
}
134123
}
135124
}
136125

137126
var runewidthtests = []struct {
138-
in rune
139-
out int
140-
eaout int
127+
in rune
128+
out int
129+
eaout int
130+
nseout int
141131
}{
142-
{'世', 2, 2},
143-
{'界', 2, 2},
144-
{'セ', 1, 1},
145-
{'カ', 1, 1},
146-
{'イ', 1, 1},
147-
{'☆', 1, 2}, // double width in ambiguous
148-
{'☺', 1, 1},
149-
{'☻', 1, 1},
150-
{'♥', 1, 2},
151-
{'♦', 1, 1},
152-
{'♣', 1, 2},
153-
{'♠', 1, 2},
154-
{'♂', 1, 2},
155-
{'♀', 1, 2},
156-
{'♪', 1, 2},
157-
{'♫', 1, 1},
158-
{'☼', 1, 1},
159-
{'↕', 1, 2},
160-
{'‼', 1, 1},
161-
{'↔', 1, 2},
162-
{'\x00', 0, 0},
163-
{'\x01', 0, 0},
164-
{'\u0300', 0, 0},
165-
{'\u2028', 0, 0},
166-
{'\u2029', 0, 0},
167-
{'a', 1, 1}, // ASCII classified as "na" (narrow)
168-
{'⟦', 1, 1}, // non-ASCII classified as "na" (narrow)
132+
{'世', 2, 2, 2},
133+
{'界', 2, 2, 2},
134+
{'セ', 1, 1, 1},
135+
{'カ', 1, 1, 1},
136+
{'イ', 1, 1, 1},
137+
{'☆', 1, 2, 2}, // double width in ambiguous
138+
{'☺', 1, 1, 2},
139+
{'☻', 1, 1, 2},
140+
{'♥', 1, 2, 2},
141+
{'♦', 1, 1, 2},
142+
{'♣', 1, 2, 2},
143+
{'♠', 1, 2, 2},
144+
{'♂', 1, 2, 2},
145+
{'♀', 1, 2, 2},
146+
{'♪', 1, 2, 2},
147+
{'♫', 1, 1, 2},
148+
{'☼', 1, 1, 2},
149+
{'↕', 1, 2, 2},
150+
{'‼', 1, 1, 2},
151+
{'↔', 1, 2, 2},
152+
{'\x00', 0, 0, 0},
153+
{'\x01', 0, 0, 0},
154+
{'\u0300', 0, 0, 0},
155+
{'\u2028', 0, 0, 0},
156+
{'\u2029', 0, 0, 0},
157+
{'a', 1, 1, 1}, // ASCII classified as "na" (narrow)
158+
{'⟦', 1, 1, 1}, // non-ASCII classified as "na" (narrow)
159+
{'👁', 1, 1, 2},
169160
}
170161

171162
func TestRuneWidth(t *testing.T) {
172163
c := NewCondition()
173164
c.EastAsianWidth = false
174165
for _, tt := range runewidthtests {
175166
if out := c.RuneWidth(tt.in); out != tt.out {
176-
t.Errorf("RuneWidth(%q) = %d, want %d", tt.in, out, tt.out)
167+
t.Errorf("RuneWidth(%q) = %d, want %d (EastAsianWidth=false)", tt.in, out, tt.out)
177168
}
178169
}
179170
c.EastAsianWidth = true
180171
for _, tt := range runewidthtests {
181172
if out := c.RuneWidth(tt.in); out != tt.eaout {
182-
t.Errorf("RuneWidth(%q) = %d, want %d", tt.in, out, tt.eaout)
173+
t.Errorf("RuneWidth(%q) = %d, want %d (EastAsianWidth=true)", tt.in, out, tt.eaout)
174+
}
175+
}
176+
c.StrictEmojiNeutral = false
177+
for _, tt := range runewidthtests {
178+
if out := c.RuneWidth(tt.in); out != tt.nseout {
179+
t.Errorf("RuneWidth(%q) = %d, want %d (StrictEmojiNeutral=false)", tt.in, out, tt.eaout)
183180
}
184181
}
185182
}

0 commit comments

Comments
 (0)