Skip to content

Commit

Permalink
Make PeekingLexer a concrete impl based on rewinder.
Browse files Browse the repository at this point in the history
This will allow passing around a single lexer through all layers,
including sub-lexers.
  • Loading branch information
alecthomas committed Dec 13, 2019
1 parent 90e2176 commit 074c00f
Show file tree
Hide file tree
Showing 14 changed files with 117 additions and 120 deletions.
8 changes: 4 additions & 4 deletions _examples/precedenceclimbing/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ func (e *Expr) String() string {
return fmt.Sprintf("%d", *e.Terminal)
}

func (e *Expr) Parse(lex lexer.PeekingLexer) error {
func (e *Expr) Parse(lex *lexer.PeekingLexer) error {
*e = *parseExpr(lex, 0)
return nil
}

// (1 + 2) * 3
func parseExpr(lex lexer.PeekingLexer, minPrec int) *Expr {
func parseExpr(lex *lexer.PeekingLexer, minPrec int) *Expr {
lhs := next(lex)
for {
op := peek(lex)
Expand All @@ -79,7 +79,7 @@ func parseOp(op *Expr, lhs *Expr, rhs *Expr) *Expr {
return op
}

func next(lex lexer.PeekingLexer) *Expr {
func next(lex *lexer.PeekingLexer) *Expr {
e := peek(lex)
if e == nil {
return e
Expand All @@ -92,7 +92,7 @@ func next(lex lexer.PeekingLexer) *Expr {
return e
}

func peek(lex lexer.PeekingLexer) *Expr {
func peek(lex *lexer.PeekingLexer) *Expr {
t, err := lex.Peek(0)
if err != nil {
panic(err)
Expand Down
2 changes: 1 addition & 1 deletion _examples/protobuf/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ var stringToScalar = map[string]Scalar{
"sfixed64": SFixed64, "bool": Bool, "string": String, "bytes": Bytes,
}

func (s *Scalar) Parse(lex lexer.PeekingLexer) error {
func (s *Scalar) Parse(lex *lexer.PeekingLexer) error {
token, err := lex.Peek(0)
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion api.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ type Parseable interface {
//
// Should return NextMatch if no tokens matched and parsing should continue.
// Nil should be returned if parsing was successful.
Parse(lex lexer.PeekingLexer) error
Parse(lex *lexer.PeekingLexer) error
}
68 changes: 6 additions & 62 deletions context.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ type contextFieldSet struct {

// Context for a single parse.
type parseContext struct {
*rewinder
*lexer.PeekingLexer
lookahead int
caseInsensitive map[rune]bool
apply []*contextFieldSet
}

func newParseContext(lex lexer.Lexer, lookahead int, caseInsensitive map[rune]bool) (*parseContext, error) {
rew, err := newRewinder(lex)
peeker, err := lexer.Upgrade(lex)
if err != nil {
return nil, err
}
return &parseContext{
rewinder: rew,
PeekingLexer: peeker,
caseInsensitive: caseInsensitive,
lookahead: lookahead,
}, nil
Expand All @@ -52,79 +52,23 @@ func (p *parseContext) Apply() error {
// Branch accepts the branch as the correct branch.
func (p *parseContext) Accept(branch *parseContext) {
p.apply = append(p.apply, branch.apply...)
p.rewinder = branch.rewinder
p.PeekingLexer = branch.PeekingLexer
}

// Branch starts a new lookahead branch.
func (p *parseContext) Branch() *parseContext {
branch := &parseContext{}
*branch = *p
branch.apply = nil
branch.rewinder = p.rewinder.Lookahead()
branch.PeekingLexer = p.PeekingLexer.Clone()
return branch
}

// Stop returns true if parsing should terminate after the given "branch" failed to match.
func (p *parseContext) Stop(branch *parseContext) bool {
if branch.cursor > p.cursor+p.lookahead {
if branch.PeekingLexer.Cursor() > p.PeekingLexer.Cursor()+p.lookahead {
p.Accept(branch)
return true
}
return false
}

type rewinder struct {
cursor, limit int
tokens []lexer.Token
}

func newRewinder(lex lexer.Lexer) (*rewinder, error) {
r := &rewinder{}
for {
t, err := lex.Next()
if err != nil {
return nil, err
}
if t.EOF() {
break
}
r.tokens = append(r.tokens, t)
}
return r, nil
}

func (r *rewinder) Next() (lexer.Token, error) {
if r.cursor >= len(r.tokens) {
return r.eofToken(), nil
}
r.cursor++
return r.tokens[r.cursor-1], nil
}

func (r *rewinder) Peek(n int) (lexer.Token, error) {
i := r.cursor + n
if i >= len(r.tokens) {
return r.eofToken(), nil
}
return r.tokens[i], nil
}

func (r *rewinder) eofToken() lexer.Token {
if len(r.tokens) > 0 {
return lexer.EOFToken(r.tokens[len(r.tokens)-1].Pos)
}
return lexer.EOFToken(lexer.Position{})
}

// Lookahead returns a new rewinder usable for lookahead.
func (r *rewinder) Lookahead() *rewinder {
clone := &rewinder{}
*clone = *r
clone.limit = clone.cursor
return clone
}

// Keep this lookahead rewinder.
func (r *rewinder) Keep() {
r.limit = 0
}
9 changes: 6 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
module github.com/alecthomas/participle

require (
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2
github.com/alecthomas/kong v0.2.1
github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.2.2
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/stretchr/testify v1.4.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
)

go 1.13
20 changes: 20 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2 h1:gKv6LPDhF/G3cNribA+kZtNPiPpKabZGLhcJuEtp3ig=
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2/go.mod h1:CxCgO+NdpMdi9SsTlGbc0W+/UNxO3I0AabOEJZ3w61w=
github.com/alecthomas/kong v0.2.1 h1:V1tLBhyQBC4rsbXbcOvm3GBaytJSwRNX69fp1WJxbqQ=
github.com/alecthomas/kong v0.2.1/go.mod h1:+inYUSluD+p4L8KdviBSgzcqEjUQOfC5fQDRFuc36lI=
github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1 h1:GDQdwm/gAcJcLAKQQZGOJ4knlw+7rfEQQcmwTbt4p5E=
github.com/alecthomas/repr v0.0.0-20181024024818-d37bc2a10ba1/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
14 changes: 5 additions & 9 deletions lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ type Lexer interface {
Next() (Token, error)
}

// A PeekingLexer returns tokens from a source and allows peeking.
type PeekingLexer interface {
Lexer
// Peek at the next token.
Peek(n int) (Token, error)
}

// SymbolsByRune returns a map of lexer symbol names keyed by rune.
func SymbolsByRune(def Definition) map[rune]string {
out := map[rune]string{}
Expand Down Expand Up @@ -99,7 +92,7 @@ func (p Position) GoString() string {
func (p Position) String() string {
filename := p.Filename
if filename == "" {
filename = "<source>"
return fmt.Sprintf("%d:%d", p.Line, p.Column)
}
return fmt.Sprintf("%s:%d:%d", filename, p.Line, p.Column)
}
Expand Down Expand Up @@ -130,7 +123,10 @@ func (t Token) String() string {
}

func (t Token) GoString() string {
return fmt.Sprintf("Token{%d, %q}", t.Type, t.Value)
if t.Pos == (Position{}) {
return fmt.Sprintf("Token{%d, %q}", t.Type, t.Value)
}
return fmt.Sprintf("Token@%s{%d, %q}", t.Pos.String(), t.Type, t.Value)
}

// MakeSymbolTable builds a lookup table for checking token ID existence.
Expand Down
67 changes: 43 additions & 24 deletions lexer/peek.go
Original file line number Diff line number Diff line change
@@ -1,37 +1,56 @@
package lexer

// Upgrade a Lexer to a PeekingLexer with arbitrary lookahead.
func Upgrade(lexer Lexer) PeekingLexer {
if peeking, ok := lexer.(PeekingLexer); ok {
return peeking
}
return &lookaheadLexer{Lexer: lexer}
}

type lookaheadLexer struct {
Lexer
peeked []Token
// PeekingLexer supports arbitrary lookahead as well as cloning.
type PeekingLexer struct {
cursor int
eof Token
tokens []Token
}

func (l *lookaheadLexer) Peek(n int) (Token, error) {
for len(l.peeked) <= n {
t, err := l.Lexer.Next()
// Upgrade a Lexer to a PeekingLexer with arbitrary lookahead.
func Upgrade(lex Lexer) (*PeekingLexer, error) {
r := &PeekingLexer{}
for {
t, err := lex.Next()
if err != nil {
return Token{}, err
return nil, err
}
if t.EOF() {
return t, nil
r.eof = t
break
}
l.peeked = append(l.peeked, t)
r.tokens = append(r.tokens, t)
}
return r, nil
}

// Cursor position in tokens.
func (p *PeekingLexer) Cursor() int {
return p.cursor
}

// Next consumes and returns the next token.
func (p *PeekingLexer) Next() (Token, error) {
if p.cursor >= len(p.tokens) {
return p.eof, nil
}
return l.peeked[n], nil
p.cursor++
return p.tokens[p.cursor-1], nil
}

func (l *lookaheadLexer) Next() (Token, error) {
if len(l.peeked) > 0 {
t := l.peeked[0]
l.peeked = l.peeked[1:]
return t, nil
// Peek ahead at the n+1 token. ie. Peek(0) will peek at the next token.
func (p *PeekingLexer) Peek(n int) (Token, error) {
i := p.cursor + n
if i >= len(p.tokens) {
return p.eof, nil
}
return l.Lexer.Next()
return p.tokens[i], nil
}

// Clone creates a clone of this PeekingLexer at its current token.
//
// The parent and clone are completely independent.
func (p *PeekingLexer) Clone() *PeekingLexer {
clone := *p
return &clone
}
5 changes: 3 additions & 2 deletions lexer/peek_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ func (s *staticLexer) Next() (Token, error) {
func TestUpgrade(t *testing.T) {
t0 := Token{Type: 1, Value: "moo"}
t1 := Token{Type: 2, Value: "blah"}
l := Upgrade(&staticLexer{tokens: []Token{t0, t1}})
l, err := Upgrade(&staticLexer{tokens: []Token{t0, t1}})
require.NoError(t, err)
require.Equal(t, t0, mustPeek(t, l, 0))
require.Equal(t, t0, mustPeek(t, l, 0))
require.Equal(t, t1, mustPeek(t, l, 1))
Expand All @@ -31,7 +32,7 @@ func TestUpgrade(t *testing.T) {
require.True(t, mustPeek(t, l, 3).EOF())
}

func mustPeek(t *testing.T, lexer PeekingLexer, n int) Token {
func mustPeek(t *testing.T, lexer *PeekingLexer, n int) Token {
token, err := lexer.Peek(n)
require.NoError(t, err)
return token
Expand Down
3 changes: 2 additions & 1 deletion lexer/text_scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (
)

func TestLexer(t *testing.T) {
lexer := Upgrade(LexString("hello world"))
lexer, err := Upgrade(LexString("hello world"))
require.NoError(t, err)
helloPos := Position{Offset: 0, Line: 1, Column: 1}
worldPos := Position{Offset: 6, Line: 1, Column: 7}
eofPos := Position{Offset: 11, Line: 1, Column: 12}
Expand Down
6 changes: 3 additions & 3 deletions nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (p *parseable) String() string { return stringer(p) }
func (p *parseable) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
rv := reflect.New(p.t)
v := rv.Interface().(Parseable)
err = v.Parse(ctx)
err = v.Parse(ctx.PeekingLexer)
if err != nil {
if err == NextMatch {
return nil, nil
Expand Down Expand Up @@ -201,10 +201,10 @@ func (d *disjunction) Parse(ctx *parseContext, parent reflect.Value) (out []refl
}
// Show the closest error returned. The idea here is that the further the parser progresses
// without error, the more difficult it is to trace the error back to its root.
if branch.cursor >= deepestError {
if branch.Cursor() >= deepestError {
firstError = err
firstValues = value
deepestError = branch.cursor
deepestError = branch.Cursor()
}
} else if value != nil {
ctx.Accept(branch)
Expand Down
Loading

0 comments on commit 074c00f

Please sign in to comment.