Skip to content

Commit

Permalink
Bug fixes, improvements, optimization & refactoring before parser gen…
Browse files Browse the repository at this point in the history
…eration (#288)

* Fix bad float conformation error message + add coverage

* Remove duplicit information from int/float conversion errors

Both strconv.ParseInt and strconv.ParseFloat already report the input.
The word invalid isn't always true - it could also just be out of range.
The type is contained within the Parse* function being called. So as the
extra context added nothing useful, I decided to remove it altogether,
making things much easier for the generated parser.

* Fix possible panic when optional groups are used within a capture

If an optional group within a capture matches no input (either because
of an unexpected token or EOF), obtaining partial AST can cause
`setField` to be called with no tokens, causing a panic. Fixed by not
doing anything.

* Improve mismatch error message for negation & negative lookahead group

It didn't properly escape the token, the error would be malformed if the
token contained single quotes. It also used a custom error instead of
`UnexpectedTokenError` for no good reason.

* Improve the choice of error reporting after optional branches

The last error in the added test explains the difference. Without this
change, it would have been `1:15: unexpected token "(" (expected ";")`,
which would be very confusing as '(' was actually allowed after the
class name.

After this change, it's `1:20: unexpected token ")" (expected <ident>)`,
pointing out the real problem - that an identifier was expected after
the comma.

* Optimize string field accumulation to do fewer allocations

* Optimize finding case insensitive tokens to be done at parser construction

Optimize finding case insensitive tokens to be done at parser construction

Doing it before each parsing was unnecessary. One could worry the
results would be different if `ParseFromLexer` was called with a
different lexer than used to build the grammar, but then case-insentive
tokens would be the least of our problems - literal & reference would
check agaist the wrong TokenType.

* Avoid an allocation in newParseContext

Not very significant for the reflective parser, more significant for the
generated one.

* Include explicit disjunction in the union node

The generated parser would be the second place that would construct a
temporary disjunction in the implementation, I think it might as well
contain the disjunction directly.

* Remove unused optional and repetition grammar nodes

They could never be created and were private, so this certainly can't
break anything.

* Change the receiver name of lookaheadGroup from n to l

It was n probably because I copied it from negation and assumed n was
for node. Naming it l is more consistent with the convention for other
nodes.

* Keep track of the number of usages of a strct node

* Allow specifying UnexpectedTokenError expected sequence as a string

Co-authored-by: Peter Dolak <peter.dolak@exponea.com>
Co-authored-by: Peter Dolak <peter.dolak@bloomreach.com>
  • Loading branch information
3 people authored Dec 5, 2022
1 parent 5adbb7c commit 1f05da7
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 140 deletions.
15 changes: 9 additions & 6 deletions context.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ type parseContext struct {
allowTrailing bool
}

func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[lexer.TokenType]bool) *parseContext {
return &parseContext{
func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[lexer.TokenType]bool) parseContext {
return parseContext{
PeekingLexer: *lex,
caseInsensitive: caseInsensitive,
lookahead: lookahead,
Expand Down Expand Up @@ -90,11 +90,14 @@ func (p *parseContext) MaybeUpdateError(err error) {

// Stop returns true if parsing should terminate after the given "branch" failed to match.
//
// Additionally, "err" should be the branch error, if any. This will be tracked to
// aid in error reporting under the assumption that the deepest occurring error is more
// useful than errors further up.
// Additionally, track the deepest error in the branch - the deeper the error, the more useful it usually is.
// It could already be the deepest error in the branch (only if deeper than current parent context deepest),
// or it could be "err", the latest error on the branch (even if same depth; the lexer holds the position).
func (p *parseContext) Stop(err error, branch *parseContext) bool {
if branch.PeekingLexer.Cursor() >= p.deepestErrorDepth {
if branch.deepestErrorDepth > p.deepestErrorDepth {
p.deepestError = branch.deepestError
p.deepestErrorDepth = branch.deepestErrorDepth
} else if branch.PeekingLexer.Cursor() >= p.deepestErrorDepth {
p.deepestError = err
p.deepestErrorDepth = maxInt(branch.PeekingLexer.Cursor(), branch.deepestErrorDepth)
}
Expand Down
10 changes: 1 addition & 9 deletions ebnf.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func buildEBNF(root bool, n node, seen map[node]bool, p *ebnfp, outp *[]*ebnfp)
p = &ebnfp{name: name}
*outp = append(*outp, p)
seen[n] = true
for i, next := range n.nodeMembers {
for i, next := range n.disjunction.nodes {
if i > 0 {
p.out += " | "
}
Expand Down Expand Up @@ -111,14 +111,6 @@ func buildEBNF(root bool, n node, seen map[node]bool, p *ebnfp, outp *[]*ebnfp)
case *reference:
p.out += "<" + strings.ToLower(n.identifier) + ">"

case *optional:
buildEBNF(false, n.node, seen, p, outp)
p.out += "?"

case *repetition:
buildEBNF(false, n.node, seen, p, outp)
p.out += "*"

case *negation:
p.out += "~"
buildEBNF(false, n.node, seen, p, outp)
Expand Down
9 changes: 6 additions & 3 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,18 @@ func FormatError(err Error) string {
// This is useful for composing parsers in order to detect when a sub-parser has terminated.
type UnexpectedTokenError struct {
Unexpected lexer.Token
at node
Expect string
expectNode node // Usable instead of Expect, delays creating the string representation until necessary
}

func (u *UnexpectedTokenError) Error() string { return FormatError(u) }

func (u *UnexpectedTokenError) Message() string { // nolint: golint
var expected string
if u.at != nil {
expected = fmt.Sprintf(" (expected %s)", u.at)
if u.expectNode != nil {
expected = fmt.Sprintf(" (expected %s)", u.expectNode)
} else if u.Expect != "" {
expected = fmt.Sprintf(" (expected %s)", u.Expect)
}
return fmt.Sprintf("unexpected token %q%s", u.Unexpected, expected)
}
Expand Down
19 changes: 12 additions & 7 deletions error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ import (
"testing"

require "github.com/alecthomas/assert/v2"

"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)

func TestErrorReporting(t *testing.T) {
type cls struct {
Visibility string `@"public"?`
Class string `"class" @Ident`
Visibility string `@"public"?`
Class string `"class" @Ident`
Bases []string `('(' @Ident (',' @Ident)+ ')')?`
}
type union struct {
Visibility string `@"public"?`
Expand All @@ -27,17 +29,20 @@ func TestErrorReporting(t *testing.T) {
}
p := mustTestParser[grammar](t, participle.UseLookahead(5))

ast, err := p.ParseString("", `public class A;`)
ast, err := p.ParseString("", `public class A(B, C); class D; public union A;`)
require.NoError(t, err)
require.Equal(t, &grammar{Decls: []*decl{
{Class: &cls{Visibility: "public", Class: "A"}},
{Class: &cls{Visibility: "public", Class: "A", Bases: []string{"B", "C"}}},
{Class: &cls{Class: "D"}},
{Union: &union{Visibility: "public", Union: "A"}},
}}, ast)
_, err = p.ParseString("", `public union A;`)
require.NoError(t, err)

_, err = p.ParseString("", `public struct Bar;`)
require.EqualError(t, err, `1:8: unexpected token "struct" (expected "union" <ident>)`)
_, err = p.ParseString("", `public class 1;`)
require.EqualError(t, err, `1:14: unexpected token "1" (expected <ident>)`)
require.EqualError(t, err, `1:14: unexpected token "1" (expected <ident> ("(" <ident> ("," <ident>)+ ")")?)`)
_, err = p.ParseString("", `public class A(B,C,);`)
require.EqualError(t, err, `1:20: unexpected token ")" (expected <ident>)`)
}

func TestErrorWrap(t *testing.T) {
Expand Down
7 changes: 5 additions & 2 deletions grammar.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (g *generatorContext) addUnionDefs(defs []unionDef) error {
}
unionNode := &union{
unionDef: def,
nodeMembers: make([]node, 0, len(def.members)),
disjunction: disjunction{nodes: make([]node, 0, len(def.members))},
}
g.typeNodes[def.typ], unionNodes[i] = unionNode, unionNode
}
Expand All @@ -41,7 +41,7 @@ func (g *generatorContext) addUnionDefs(defs []unionDef) error {
if err != nil {
return err
}
unionNode.nodeMembers = append(unionNode.nodeMembers, memberNode)
unionNode.disjunction.nodes = append(unionNode.disjunction.nodes, memberNode)
}
}
return nil
Expand All @@ -61,6 +61,9 @@ func (g *generatorContext) addCustomDefs(defs []customDef) error {
func (g *generatorContext) parseType(t reflect.Type) (_ node, returnedError error) {
t = indirectType(t)
if n, ok := g.typeNodes[t]; ok {
if s, ok := n.(*strct); ok {
s.usages++
}
return n, nil
}
if t.Implements(parseableType) {
Expand Down
110 changes: 26 additions & 84 deletions nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,15 @@ func (c *custom) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.V
// @@ (for a union)
type union struct {
unionDef
nodeMembers []node
disjunction disjunction
}

func (u *union) String() string { return ebnf(u) }
func (u *union) GoString() string { return u.typ.Name() }

func (u *union) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(u)()
temp := disjunction{u.nodeMembers}
vals, err := temp.Parse(ctx, parent)
vals, err := u.disjunction.Parse(ctx, parent)
if err != nil {
return nil, err
}
Expand All @@ -123,11 +122,13 @@ type strct struct {
tokensFieldIndex []int
posFieldIndex []int
endPosFieldIndex []int
usages int
}

func newStrct(typ reflect.Type) *strct {
s := &strct{
typ: typ,
typ: typ,
usages: 1,
}
field, ok := typ.FieldByName("Pos")
if ok && field.Type == positionType {
Expand Down Expand Up @@ -295,19 +296,18 @@ type lookaheadGroup struct {
negative bool
}

func (n *lookaheadGroup) String() string { return ebnf(n) }
func (n *lookaheadGroup) GoString() string { return "lookaheadGroup{}" }
func (l *lookaheadGroup) String() string { return ebnf(l) }
func (l *lookaheadGroup) GoString() string { return "lookaheadGroup{}" }

func (n *lookaheadGroup) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(n)()
func (l *lookaheadGroup) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(l)()
// Create a branch to avoid advancing the parser as any match will be discarded
branch := ctx.Branch()
out, err = n.expr.Parse(branch, parent)
out, err = l.expr.Parse(branch, parent)
matchedLookahead := err == nil && out != nil
expectingMatch := !n.negative
expectingMatch := !l.negative
if matchedLookahead != expectingMatch {
peek := ctx.Peek()
return nil, Errorf(peek.Pos, "unexpected '%s'", peek.Value)
return nil, &UnexpectedTokenError{Unexpected: *ctx.Peek()}
}
return []reflect.Value{}, nil // Empty match slice means a match, unlike nil
}
Expand Down Expand Up @@ -382,7 +382,7 @@ func (s *sequence) Parse(ctx *parseContext, parent reflect.Value) (out []reflect
return nil, nil
}
token := ctx.Peek()
return out, &UnexpectedTokenError{Unexpected: *token, at: n}
return out, &UnexpectedTokenError{Unexpected: *token, expectNode: n}
}
// Special-case for when children return an empty match.
// Appending an empty, non-nil slice to a nil slice returns a nil slice.
Expand Down Expand Up @@ -440,72 +440,6 @@ func (r *reference) Parse(ctx *parseContext, parent reflect.Value) (out []reflec
return []reflect.Value{reflect.ValueOf(token.Value)}, nil
}

// [ <expr> ] <sequence>
type optional struct {
node node
}

func (o *optional) String() string { return ebnf(o) }
func (o *optional) GoString() string { return "optional{}" }

func (o *optional) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(o)()
branch := ctx.Branch()
out, err = o.node.Parse(branch, parent)
if err != nil {
// Optional part failed to match.
if ctx.Stop(err, branch) {
return out, err
}
} else {
ctx.Accept(branch)
}
if out == nil {
out = []reflect.Value{}
}
return out, nil
}

// { <expr> } <sequence>
type repetition struct {
node node
}

func (r *repetition) String() string { return ebnf(r) }
func (r *repetition) GoString() string { return "repetition{}" }

// Parse a repetition. Once a repetition is encountered it will always match, so grammars
// should ensure that branches are differentiated prior to the repetition.
func (r *repetition) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(r)()
i := 0
for ; i < MaxIterations; i++ {
branch := ctx.Branch()
v, err := r.node.Parse(branch, parent)
out = append(out, v...)
if err != nil {
// Optional part failed to match.
if ctx.Stop(err, branch) {
return out, err
}
break
} else {
ctx.Accept(branch)
}
if v == nil {
break
}
}
if i >= MaxIterations {
t := ctx.Peek()
return nil, Errorf(t.Pos, "too many iterations of %s (> %d)", r, MaxIterations)
}
if out == nil {
out = []reflect.Value{}
}
return out, nil
}

// Match a token literal exactly "..."[:<type>].
type literal struct {
s string
Expand Down Expand Up @@ -556,7 +490,7 @@ func (n *negation) Parse(ctx *parseContext, parent reflect.Value) (out []reflect
out, err = n.node.Parse(branch, parent)
if out != nil && err == nil {
// out being non-nil means that what we don't want is actually here, so we report nomatch
return nil, Errorf(notEOF.Pos, "unexpected '%s'", notEOF.Value)
return nil, &UnexpectedTokenError{Unexpected: *notEOF}
}

// Just give the next token
Expand Down Expand Up @@ -591,15 +525,15 @@ func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err e
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n, err := strconv.ParseInt(v.String(), 0, sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetInt(n)

case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
n, err := strconv.ParseUint(v.String(), 0, sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetUint(n)
Expand All @@ -610,7 +544,7 @@ func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err e
case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(v.String(), sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetFloat(n)
Expand Down Expand Up @@ -735,9 +669,14 @@ func setField(tokens []lexer.Token, strct reflect.Value, field structLexerField,
if err != nil {
return err
}
if len(fieldValue) == 0 {
return nil
}
accumulated := f.String()
for _, v := range fieldValue {
f.Set(reflect.ValueOf(f.String() + v.String()).Convert(f.Type()))
accumulated += v.String()
}
f.SetString(accumulated)
return nil
}

Expand All @@ -755,6 +694,9 @@ func setField(tokens []lexer.Token, strct reflect.Value, field structLexerField,
if err != nil {
return err
}
if len(fieldValue) == 0 {
return nil // Nothing to capture, can happen when trying to get a partial parse tree
}

fv := fieldValue[0]

Expand Down
Loading

0 comments on commit 1f05da7

Please sign in to comment.