Skip to content

Commit fe13c4a

Browse files
committed
pretty: make the formatting code 99% faster
This patch memoizes the calls to the "best choice" function so that the same choice is not computed twice for the same alternative during the recursion. Benchmark difference: ``` name old time/op new time/op delta PrettyData-16 1.79s ±31% 0.13s ± 3% -92.50% (p=0.008 n=5+5) name old alloc/op new alloc/op delta PrettyData-16 630MB ± 1% 24MB ± 0% -96.15% (p=0.008 n=5+5) name old allocs/op new allocs/op delta PrettyData-16 4.27M ± 0% 0.01M ± 0% -99.74% (p=0.008 n=5+5) ``` Release note: None
1 parent 35ef4e8 commit fe13c4a

File tree

2 files changed

+145
-112
lines changed

2 files changed

+145
-112
lines changed

pkg/util/pretty/document.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@ import (
4242
// referenced paper. This is the abstract representation constructed
4343
// by the pretty-printing code.
4444
type Doc interface {
45-
// All Docs can uniquely convert themselves into a string so they can be
46-
// memoized during better calculation.
47-
String() string
4845
isDoc()
4946
}
5047

@@ -55,13 +52,6 @@ func (concat) isDoc() {}
5552
func (nest) isDoc() {}
5653
func (union) isDoc() {}
5754

58-
func (d text) String() string { return fmt.Sprintf("(TEXT %q)", string(d)) }
59-
func (line) String() string { return "LINE" }
60-
func (nilDoc) String() string { return "NIL" }
61-
func (d concat) String() string { return fmt.Sprintf("(%s :<> %s)", d.a, d.b) }
62-
func (d nest) String() string { return fmt.Sprintf("(NEST %d %s)", d.n, d.d) }
63-
func (d union) String() string { return fmt.Sprintf("(%s :<|> %s)", d.x, d.y) }
64-
6555
//
6656
// Implementations of Doc ("DOC" in paper).
6757
//

pkg/util/pretty/pretty.go

Lines changed: 145 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package pretty
1616

1717
import (
18-
"bytes"
1918
"fmt"
2019
"strings"
2120
)
@@ -28,32 +27,18 @@ import (
2827
// "Doc" in the referenced paper (not "DOC"). This is the
2928
// less-abstract representation constructed during "best layout"
3029
// selection.
31-
type docBest interface {
32-
String() string
33-
isDocBest()
30+
type docBest struct {
31+
tag docBestType
32+
s string
33+
d *docBest
3434
}
3535

36-
func (nilDocB) String() string { return "Nil" }
37-
func (d textB) String() string { return fmt.Sprintf("(%q `Text` %s)", d.s, d.d) }
38-
func (d lineB) String() string { return fmt.Sprintf("(%q `Line` %s)", d.s, d.d) }
36+
type docBestType int
3937

40-
func (nilDocB) isDocBest() {}
41-
func (textB) isDocBest() {}
42-
func (lineB) isDocBest() {}
43-
44-
type nilDocB struct{}
45-
46-
var nilB nilDocB
47-
48-
type textB struct {
49-
s string
50-
d docBest
51-
}
52-
53-
type lineB struct {
54-
s string
55-
d docBest
56-
}
38+
const (
39+
textB docBestType = iota
40+
lineB
41+
)
5742

5843
// Pretty returns a pretty-printed string for the Doc d at line length n.
5944
func Pretty(d Doc, n int) string {
@@ -64,133 +49,191 @@ func Pretty(d Doc, n int) string {
6449
}
6550

6651
// w is the max line width.
67-
func best(w int, x Doc) docBest {
52+
func best(w int, x Doc) *docBest {
6853
b := beExec{
69-
w: w,
70-
cache: make(map[cacheKey]docBest),
54+
w: w,
55+
memoBe: make(map[beArgs]*docBest),
56+
memoiDoc: make(map[iDoc]*iDoc),
7157
}
72-
return b.be(0, iDoc{0, "", x})
58+
return b.be(0, &iDoc{0, "", x, nil})
7359
}
7460

61+
// iDoc represents the type [(Int,DOC)] in the paper,
62+
// extended with arbitrary string prefixes (not just int).
63+
// We'll use linked lists because this makes the
64+
// recursion more efficient than slices.
7565
type iDoc struct {
76-
i int
77-
s string
78-
d Doc
79-
}
80-
81-
func (i iDoc) String() string {
82-
return fmt.Sprintf("{%d: %s}", i.i, i.d)
83-
}
84-
85-
type cacheKey struct {
86-
k int
87-
s string
66+
i int
67+
s string
68+
d Doc
69+
next *iDoc
8870
}
8971

9072
type beExec struct {
73+
// w is the available line width.
9174
w int
92-
// cache is a memoized cache used during better calculation.
93-
cache map[cacheKey]docBest
94-
buf bytes.Buffer
75+
76+
// memoBe internalizes the results of the be function, so that the
77+
// same value is not computed multiple times.
78+
memoBe map[beArgs]*docBest
79+
80+
// memo internalizes iDoc objects to ensure they are unique in memory,
81+
// and we can use pointer-pointer comparisons.
82+
memoiDoc map[iDoc]*iDoc
83+
84+
// docAlloc speeds up the allocations of be()'s return values
85+
// by (*beExec).newDocBest() defined below.
86+
docAlloc []docBest
87+
88+
// idocAlloc speeds up the allocations by (*beExec).iDoc() defined
89+
// below.
90+
idocAlloc []iDoc
9591
}
9692

97-
func (b beExec) be(k int, x ...iDoc) docBest {
98-
if len(x) == 0 {
99-
return nilB
93+
func (b *beExec) be(k int, xlist *iDoc) *docBest {
94+
// Shortcut: be k [] = Nil
95+
if xlist == nil {
96+
return nil
97+
}
98+
99+
// If we've computed this result before, short cut here too.
100+
memoKey := beArgs{k: k, d: xlist}
101+
if cached, ok := b.memoBe[memoKey]; ok {
102+
return cached
100103
}
101-
d := x[0]
102-
z := x[1:]
104+
105+
// General case.
106+
107+
d := *xlist
108+
z := xlist.next
109+
110+
// Note: we'll need to memoize the result below.
111+
var res *docBest
112+
103113
switch t := d.d.(type) {
104114
case nilDoc:
105-
return b.be(k, z...)
115+
res = b.be(k, z)
106116
case concat:
107-
return b.be(k, append([]iDoc{{d.i, d.s, t.a}, {d.i, d.s, t.b}}, z...)...)
117+
res = b.be(k, b.iDoc(d.i, d.s, t.a, b.iDoc(d.i, d.s, t.b, z)))
108118
case nest:
109-
x[0] = iDoc{
110-
d: t.d,
111-
s: d.s + t.s,
112-
i: d.i + t.n,
113-
}
114-
return b.be(k, x...)
119+
res = b.be(k, b.iDoc(d.i+t.n, d.s+t.s, t.d, z))
115120
case text:
116-
return textB{
117-
s: string(t),
118-
d: b.be(k+len(t), z...),
119-
}
121+
res = b.newDocBest(docBest{
122+
tag: textB,
123+
s: string(t),
124+
d: b.be(k+len(t), z),
125+
})
120126
case line:
121-
return lineB{
122-
s: d.s,
123-
d: b.be(d.i, z...),
124-
}
127+
res = b.newDocBest(docBest{
128+
tag: lineB,
129+
s: d.s,
130+
d: b.be(d.i, z),
131+
})
125132
case union:
126-
// Use a memoized version of the Doc and check if it's been through this
127-
// function before. There may be a faster implementation that converts this
128-
// function to an iterative style, but this current implementation is almost
129-
// identical to the paper (as this in done automatically in Haskell) and is
130-
// fast enough.
131-
for _, xd := range x {
132-
b.buf.WriteString(xd.String())
133-
}
134-
key := cacheKey{
135-
k: k,
136-
s: b.buf.String(),
137-
}
138-
b.buf.Reset()
139-
cached, ok := b.cache[key]
140-
if ok {
141-
return cached
142-
}
143-
144-
n := append([]iDoc{{d.i, d.s, t.x}}, z...)
145-
res := better(b.w, k,
146-
b.be(k, n...),
147-
func() docBest {
148-
n[0].d = t.y
149-
return b.be(k, n...)
133+
res = better(b.w, k,
134+
b.be(k, b.iDoc(d.i, d.s, t.x, z)),
135+
// We eta-lift the second argument to avoid eager evaluation.
136+
func() *docBest {
137+
return b.be(k, b.iDoc(d.i, d.s, t.y, z))
150138
},
151139
)
152-
b.cache[key] = res
153-
return res
154140
default:
155141
panic(fmt.Errorf("unknown type: %T", d.d))
156142
}
143+
144+
// Memoize so we don't compute the same result twice.
145+
b.memoBe[memoKey] = res
146+
147+
return res
148+
}
149+
150+
// newDocBest makes a new docBest on the heap. Allocations
151+
// are batched for more efficiency.
152+
func (b *beExec) newDocBest(d docBest) *docBest {
153+
buf := &b.docAlloc
154+
if len(*buf) == 0 {
155+
*buf = make([]docBest, 100)
156+
}
157+
r := &(*buf)[0]
158+
*r = d
159+
*buf = (*buf)[1:]
160+
return r
157161
}
158162

159-
func better(w, k int, x docBest, y func() docBest) docBest {
163+
// iDoc retrieves the unique instance of iDoc in memory for the given
164+
// values of i, s, d and z. The object is constructed if it does not
165+
// exist yet.
166+
//
167+
// The results of this function guarantee that the pointer addresses
168+
// are equal if the arguments used to construct the value were equal.
169+
func (b *beExec) iDoc(i int, s string, d Doc, z *iDoc) *iDoc {
170+
idoc := iDoc{i, s, d, z}
171+
if m, ok := b.memoiDoc[idoc]; ok {
172+
return m
173+
}
174+
r := b.newiDoc(idoc)
175+
b.memoiDoc[idoc] = r
176+
return r
177+
}
178+
179+
// newiDoc makes a new iDoc on the heap. Allocations are batched
180+
// for more efficiency. Do not use this directly! Instead
181+
// use the iDoc() method defined above.
182+
func (b *beExec) newiDoc(d iDoc) *iDoc {
183+
buf := &b.idocAlloc
184+
if len(*buf) == 0 {
185+
*buf = make([]iDoc, 100)
186+
}
187+
r := &(*buf)[0]
188+
*r = d
189+
*buf = (*buf)[1:]
190+
return r
191+
}
192+
193+
type beArgs struct {
194+
k int
195+
d *iDoc
196+
}
197+
198+
func better(w, k int, x *docBest, y func() *docBest) *docBest {
160199
if fits(w-k, x) {
161200
return x
162201
}
163202
return y()
164203
}
165204

166-
func fits(w int, x docBest) bool {
205+
func fits(w int, x *docBest) bool {
167206
if w < 0 {
168207
return false
169208
}
170-
switch t := x.(type) {
171-
case nilDocB:
209+
if x == nil {
210+
// Nil doc.
172211
return true
212+
}
213+
switch x.tag {
173214
case textB:
174-
return fits(w-len(t.s), t.d)
215+
return fits(w-len(x.s), x.d)
175216
case lineB:
176217
return true
177218
default:
178-
panic(fmt.Errorf("unknown type: %T", x))
219+
panic(fmt.Errorf("unknown type: %d", x.tag))
179220
}
180221
}
181222

182-
func layout(sb *strings.Builder, d docBest) {
183-
switch d := d.(type) {
184-
case nilDocB:
185-
// ignore
223+
func layout(sb *strings.Builder, d *docBest) {
224+
if d == nil {
225+
// Nil doc: no output.
226+
return
227+
}
228+
switch d.tag {
186229
case textB:
187230
sb.WriteString(d.s)
188231
layout(sb, d.d)
189232
case lineB:
190-
sb.WriteString("\n")
233+
sb.WriteByte('\n')
191234
sb.WriteString(d.s)
192235
layout(sb, d.d)
193236
default:
194-
panic(fmt.Errorf("unknown type: %T", d))
237+
panic(fmt.Errorf("unknown type: %d", d.tag))
195238
}
196239
}

0 commit comments

Comments
 (0)