Skip to content

Commit 35ef4e8

Browse files
committed
pretty: bring the code closer to Wadler's code
The Go code was mixing up the two types Doc and DOC from Philip Wadler's prettier printer. This patch splits them, which opens an optimization avenue. Release note: None
1 parent 8252265 commit 35ef4e8

File tree

3 files changed

+117
-98
lines changed

3 files changed

+117
-98
lines changed

pkg/util/pretty/document.go

Lines changed: 49 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,19 @@
2828
// side. The paper then describes various performance improvements that reduce
2929
// the search space of the best function such that it can complete in O(n)
3030
// instead of O(n^2) time, where n is the number of nodes.
31+
//
32+
// For example code with SQL to experiment further, refer to
33+
// https://github.com/knz/prettier/
34+
//
3135
package pretty
3236

3337
import (
3438
"fmt"
3539
)
3640

37-
// Doc represents a document as described by the referenced paper.
41+
// Doc represents a document as described by the type "DOC" in the
42+
// referenced paper. This is the abstract representation constructed
43+
// by the pretty-printing code.
3844
type Doc interface {
3945
// All Docs can uniquely convert themselves into a string so they can be
4046
// memoized during better calculation.
@@ -48,81 +54,77 @@ func (nilDoc) isDoc() {}
4854
func (concat) isDoc() {}
4955
func (nest) isDoc() {}
5056
func (union) isDoc() {}
51-
func (textX) isDoc() {}
52-
func (lineX) isDoc() {}
5357

54-
func (d text) String() string { return fmt.Sprintf("(%q)", string(d)) }
58+
func (d text) String() string { return fmt.Sprintf("(TEXT %q)", string(d)) }
5559
func (line) String() string { return "LINE" }
5660
func (nilDoc) String() string { return "NIL" }
5761
func (d concat) String() string { return fmt.Sprintf("(%s :<> %s)", d.a, d.b) }
5862
func (d nest) String() string { return fmt.Sprintf("(NEST %d %s)", d.n, d.d) }
5963
func (d union) String() string { return fmt.Sprintf("(%s :<|> %s)", d.x, d.y) }
60-
func (d textX) String() string { return fmt.Sprintf("(%q TEXTX %s)", d.s, d.d) }
61-
func (d lineX) String() string { return fmt.Sprintf("(%q LINEX %s)", d.s, d.d) }
6264

65+
//
66+
// Implementations of Doc ("DOC" in paper).
67+
//
68+
69+
// nilDoc represents NIL :: DOC -- the empty doc.
6370
type nilDoc struct{}
6471

65-
// Nil is the empty Doc.
72+
// Nil is the NIL constructor.
6673
var Nil nilDoc
6774

75+
// text represents (TEXT s) :: DOC -- a simple text string.
6876
type text string
6977

70-
// Text creates a string Doc.
78+
// Text is the TEXT constructor.
7179
func Text(s string) Doc {
7280
return text(s)
7381
}
7482

83+
// line represents LINE :: DOC -- a "soft line" that can be flattened to a space.
7584
type line struct{}
7685

77-
// Line is either a newline or a space if flattened onto a single line.
86+
// Line is the LINE constructor.
7887
var Line line
7988

89+
// concat represents (DOC <> DOC) :: DOC -- the concatenation of two docs.
8090
type concat struct {
8191
a, b Doc
8292
}
8393

84-
// fnNotNil runs returns fn(a, b). nil (the Go value) is converted to Nil (the
85-
// Doc). If either Doc is Nil, the other Doc is returned without invoking fn.
86-
func concatFn(a, b Doc, fn func(Doc, Doc) Doc) Doc {
87-
if a == nil {
88-
a = Nil
89-
}
90-
if b == nil {
91-
b = Nil
92-
}
93-
if a == Nil {
94-
return b
95-
}
96-
if b == Nil {
97-
return a
98-
}
99-
return fn(a, b)
94+
// Concat is the <> constructor.
95+
// This uses simplifyNil to avoid actually inserting NIL docs
96+
// in the abstract tree.
97+
func Concat(a, b Doc) Doc {
98+
return simplifyNil(a, b, func(a, b Doc) Doc { return concat{a, b} })
10099
}
101100

102-
// Concat concatenates two Docs.
103-
func Concat(a, b Doc) Doc {
104-
return concatFn(a, b, func(a, b Doc) Doc {
105-
return concat{a, b}
106-
})
101+
// nest represents (NEST Int DOC) :: DOC -- nesting a doc "under" another.
102+
// NEST indents d by s at effective length n. len(s) does not have to be
103+
// n. This allows s to be a tab character and n can be a tab width.
104+
type nest struct {
105+
n int
106+
s string
107+
d Doc
107108
}
108109

109-
// Fill concatenates a list of docs with spaces if possible,
110-
// otherwise with newlines.
111-
// See linked paper pp 14-15.
112-
// func Fill(d ...Doc) Doc {
113-
// switch len(d) {
114-
// case 0:
115-
// return Nil
116-
// case 1:
117-
// return d[0]
118-
// default:
119-
// rest := Fill(d[1:]...)
120-
// return union{
121-
// ConcatSpace(flatten(d[0]), rest),
122-
// ConcatLine(d[0], rest),
123-
// }
124-
// }
125-
// }
110+
// Nest is the NEST constructor.
111+
func Nest(n int, s string, d Doc) Doc {
112+
return nest{n, s, d}
113+
}
114+
115+
// union represents (DOC <|> DOC) :: DOC -- the union of two docs.
116+
// <|> is really the union of two sets of layouts. x and y must flatten to the
117+
// same layout. Additionally, no first line of a document in x is shorter
118+
// than some first line of a document in y; or, equivalently, every first
119+
// line in x is at least as long as every first line in y.
120+
//
121+
// The main use of the union is via the Group operator defined below.
122+
//
123+
// We do not provide a public constructor as this type is not
124+
// exported.
125+
type union struct {
126+
x, y Doc
127+
}
126128

127129
// Group will format d on one line if possible.
128130
func Group(d Doc) Doc {
@@ -149,35 +151,3 @@ func flatten(d Doc) Doc {
149151
panic(fmt.Errorf("unknown type: %T", d))
150152
}
151153
}
152-
153-
type nest struct {
154-
n int
155-
s string
156-
d Doc
157-
}
158-
159-
// Nest indents d by s at effective length n. len(s) does not have to be
160-
// n. This allows s to be a tab character and n can be a tab width.
161-
func Nest(n int, s string, d Doc) Doc {
162-
return nest{n, s, d}
163-
}
164-
165-
// Doc types below are not directly accessible to users.
166-
167-
// union is the union of two sets of layouts. x and y must flatten to the
168-
// same layout. Additionally, no first line of a document in x is shorter
169-
// than some first line of a document in y; or, equivalently, every first
170-
// line in x is at least as long as every first line in y.
171-
type union struct {
172-
x, y Doc
173-
}
174-
175-
type textX struct {
176-
s string
177-
d Doc
178-
}
179-
180-
type lineX struct {
181-
s string
182-
d Doc
183-
}

pkg/util/pretty/pretty.go

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,37 @@ import (
2424
// of the below code. Methods, variables, and implementation details were
2525
// made to resemble it as close as possible.
2626

27+
// docBest represents a selected document as described by the type
28+
// "Doc" in the referenced paper (not "DOC"). This is the
29+
// less-abstract representation constructed during "best layout"
30+
// selection.
31+
type docBest interface {
32+
String() string
33+
isDocBest()
34+
}
35+
36+
func (nilDocB) String() string { return "Nil" }
37+
func (d textB) String() string { return fmt.Sprintf("(%q `Text` %s)", d.s, d.d) }
38+
func (d lineB) String() string { return fmt.Sprintf("(%q `Line` %s)", d.s, d.d) }
39+
40+
func (nilDocB) isDocBest() {}
41+
func (textB) isDocBest() {}
42+
func (lineB) isDocBest() {}
43+
44+
type nilDocB struct{}
45+
46+
var nilB nilDocB
47+
48+
type textB struct {
49+
s string
50+
d docBest
51+
}
52+
53+
type lineB struct {
54+
s string
55+
d docBest
56+
}
57+
2758
// Pretty returns a pretty-printed string for the Doc d at line length n.
2859
func Pretty(d Doc, n int) string {
2960
var sb strings.Builder
@@ -33,10 +64,10 @@ func Pretty(d Doc, n int) string {
3364
}
3465

3566
// w is the max line width.
36-
func best(w int, x Doc) Doc {
67+
func best(w int, x Doc) docBest {
3768
b := beExec{
3869
w: w,
39-
cache: make(map[cacheKey]Doc),
70+
cache: make(map[cacheKey]docBest),
4071
}
4172
return b.be(0, iDoc{0, "", x})
4273
}
@@ -59,13 +90,13 @@ type cacheKey struct {
5990
type beExec struct {
6091
w int
6192
// cache is a memoized cache used during better calculation.
62-
cache map[cacheKey]Doc
93+
cache map[cacheKey]docBest
6394
buf bytes.Buffer
6495
}
6596

66-
func (b beExec) be(k int, x ...iDoc) Doc {
97+
func (b beExec) be(k int, x ...iDoc) docBest {
6798
if len(x) == 0 {
68-
return Nil
99+
return nilB
69100
}
70101
d := x[0]
71102
z := x[1:]
@@ -82,12 +113,12 @@ func (b beExec) be(k int, x ...iDoc) Doc {
82113
}
83114
return b.be(k, x...)
84115
case text:
85-
return textX{
116+
return textB{
86117
s: string(t),
87118
d: b.be(k+len(t), z...),
88119
}
89120
case line:
90-
return lineX{
121+
return lineB{
91122
s: d.s,
92123
d: b.be(d.i, z...),
93124
}
@@ -113,7 +144,7 @@ func (b beExec) be(k int, x ...iDoc) Doc {
113144
n := append([]iDoc{{d.i, d.s, t.x}}, z...)
114145
res := better(b.w, k,
115146
b.be(k, n...),
116-
func() Doc {
147+
func() docBest {
117148
n[0].d = t.y
118149
return b.be(k, n...)
119150
},
@@ -125,37 +156,37 @@ func (b beExec) be(k int, x ...iDoc) Doc {
125156
}
126157
}
127158

128-
func better(w, k int, x Doc, y func() Doc) Doc {
159+
func better(w, k int, x docBest, y func() docBest) docBest {
129160
if fits(w-k, x) {
130161
return x
131162
}
132163
return y()
133164
}
134165

135-
func fits(w int, x Doc) bool {
166+
func fits(w int, x docBest) bool {
136167
if w < 0 {
137168
return false
138169
}
139170
switch t := x.(type) {
140-
case nilDoc:
171+
case nilDocB:
141172
return true
142-
case textX:
173+
case textB:
143174
return fits(w-len(t.s), t.d)
144-
case lineX:
175+
case lineB:
145176
return true
146177
default:
147178
panic(fmt.Errorf("unknown type: %T", x))
148179
}
149180
}
150181

151-
func layout(sb *strings.Builder, d Doc) {
182+
func layout(sb *strings.Builder, d docBest) {
152183
switch d := d.(type) {
153-
case nilDoc:
184+
case nilDocB:
154185
// ignore
155-
case textX:
186+
case textB:
156187
sb.WriteString(d.s)
157188
layout(sb, d.d)
158-
case lineX:
189+
case lineB:
159190
sb.WriteString("\n")
160191
sb.WriteString(d.s)
161192
layout(sb, d.d)

pkg/util/pretty/util.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func JoinNestedRight(n int, s string, sep Doc, nested ...Doc) Doc {
6262

6363
// ConcatLine concatenates two Docs with a Line.
6464
func ConcatLine(a, b Doc) Doc {
65-
return concatFn(a, b, func(a, b Doc) Doc {
65+
return simplifyNil(a, b, func(a, b Doc) Doc {
6666
return Concat(
6767
a,
6868
Concat(
@@ -75,7 +75,7 @@ func ConcatLine(a, b Doc) Doc {
7575

7676
// ConcatSpace concatenates two Docs with a space.
7777
func ConcatSpace(a, b Doc) Doc {
78-
return concatFn(a, b, func(a, b Doc) Doc {
78+
return simplifyNil(a, b, func(a, b Doc) Doc {
7979
return Concat(
8080
a,
8181
Concat(
@@ -146,3 +146,21 @@ func Bracket(n int, s string, l string, x Doc, r string) Doc {
146146
)
147147
return union{flatten(a), b}
148148
}
149+
150+
// simplifyNil returns fn(a, b). nil (the Go value) is converted to Nil (the
151+
// Doc). If either Doc is Nil, the other Doc is returned without invoking fn.
152+
func simplifyNil(a, b Doc, fn func(Doc, Doc) Doc) Doc {
153+
if a == nil {
154+
a = Nil
155+
}
156+
if b == nil {
157+
b = Nil
158+
}
159+
if a == Nil {
160+
return b
161+
}
162+
if b == Nil {
163+
return a
164+
}
165+
return fn(a, b)
166+
}

0 commit comments

Comments
 (0)