Skip to content

Commit

Permalink
go.exp: add packages ebnf, gotype, inotify, locale, ssa, utf8string, …
Browse files Browse the repository at this point in the history
…winfsnotify.

Packages moved from main repo, with minor adjustments so tests pass.

See also https://golang.org/cl/7456047

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7463043
  • Loading branch information
robpike committed Mar 2, 2013
1 parent a083aa8 commit b8afd77
Show file tree
Hide file tree
Showing 81 changed files with 76,083 additions and 0 deletions.
269 changes: 269 additions & 0 deletions ebnf/ebnf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
// satisfying the following grammar (represented itself in EBNF):
//
// Production = name "=" [ Expression ] "." .
// Expression = Alternative { "|" Alternative } .
// Alternative = Term { Term } .
// Term = name | token [ "…" token ] | Group | Option | Repetition .
// Group = "(" Expression ")" .
// Option = "[" Expression "]" .
// Repetition = "{" Expression "}" .
//
// A name is a Go identifier, a token is a Go string, and comments
// and white space follow the same rules as for the Go language.
// Production names starting with an uppercase Unicode letter denote
// non-terminal productions (i.e., productions which allow white-space
// and comments between tokens); all other production names denote
// lexical productions.
//
package ebnf

import (
"errors"
"fmt"
"text/scanner"
"unicode"
"unicode/utf8"
)

// ----------------------------------------------------------------------------
// Error handling

type errorList []error

func (list errorList) Err() error {
if len(list) == 0 {
return nil
}
return list
}

func (list errorList) Error() string {
switch len(list) {
case 0:
return "no errors"
case 1:
return list[0].Error()
}
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
}

func newError(pos scanner.Position, msg string) error {
return errors.New(fmt.Sprintf("%s: %s", pos, msg))
}

// ----------------------------------------------------------------------------
// Internal representation

type (
// An Expression node represents a production expression.
Expression interface {
// Pos is the position of the first character of the syntactic construct
Pos() scanner.Position
}

// An Alternative node represents a non-empty list of alternative expressions.
Alternative []Expression // x | y | z

// A Sequence node represents a non-empty list of sequential expressions.
Sequence []Expression // x y z

// A Name node represents a production name.
Name struct {
StringPos scanner.Position
String string
}

// A Token node represents a literal.
Token struct {
StringPos scanner.Position
String string
}

// A List node represents a range of characters.
Range struct {
Begin, End *Token // begin ... end
}

// A Group node represents a grouped expression.
Group struct {
Lparen scanner.Position
Body Expression // (body)
}

// An Option node represents an optional expression.
Option struct {
Lbrack scanner.Position
Body Expression // [body]
}

// A Repetition node represents a repeated expression.
Repetition struct {
Lbrace scanner.Position
Body Expression // {body}
}

// A Production node represents an EBNF production.
Production struct {
Name *Name
Expr Expression
}

// A Bad node stands for pieces of source code that lead to a parse error.
Bad struct {
TokPos scanner.Position
Error string // parser error message
}

// A Grammar is a set of EBNF productions. The map
// is indexed by production name.
//
Grammar map[string]*Production
)

func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
func (x *Name) Pos() scanner.Position { return x.StringPos }
func (x *Token) Pos() scanner.Position { return x.StringPos }
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
func (x *Group) Pos() scanner.Position { return x.Lparen }
func (x *Option) Pos() scanner.Position { return x.Lbrack }
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
func (x *Bad) Pos() scanner.Position { return x.TokPos }

// ----------------------------------------------------------------------------
// Grammar verification

func isLexical(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return !unicode.IsUpper(ch)
}

type verifier struct {
errors errorList
worklist []*Production
reached Grammar // set of productions reached from (and including) the root production
grammar Grammar
}

func (v *verifier) error(pos scanner.Position, msg string) {
v.errors = append(v.errors, newError(pos, msg))
}

func (v *verifier) push(prod *Production) {
name := prod.Name.String
if _, found := v.reached[name]; !found {
v.worklist = append(v.worklist, prod)
v.reached[name] = prod
}
}

func (v *verifier) verifyChar(x *Token) rune {
s := x.String
if utf8.RuneCountInString(s) != 1 {
v.error(x.Pos(), "single char expected, found "+s)
return 0
}
ch, _ := utf8.DecodeRuneInString(s)
return ch
}

func (v *verifier) verifyExpr(expr Expression, lexical bool) {
switch x := expr.(type) {
case nil:
// empty expression
case Alternative:
for _, e := range x {
v.verifyExpr(e, lexical)
}
case Sequence:
for _, e := range x {
v.verifyExpr(e, lexical)
}
case *Name:
// a production with this name must exist;
// add it to the worklist if not yet processed
if prod, found := v.grammar[x.String]; found {
v.push(prod)
} else {
v.error(x.Pos(), "missing production "+x.String)
}
// within a lexical production references
// to non-lexical productions are invalid
if lexical && !isLexical(x.String) {
v.error(x.Pos(), "reference to non-lexical production "+x.String)
}
case *Token:
// nothing to do for now
case *Range:
i := v.verifyChar(x.Begin)
j := v.verifyChar(x.End)
if i >= j {
v.error(x.Pos(), "decreasing character range")
}
case *Group:
v.verifyExpr(x.Body, lexical)
case *Option:
v.verifyExpr(x.Body, lexical)
case *Repetition:
v.verifyExpr(x.Body, lexical)
case *Bad:
v.error(x.Pos(), x.Error)
default:
panic(fmt.Sprintf("internal error: unexpected type %T", expr))
}
}

func (v *verifier) verify(grammar Grammar, start string) {
// find root production
root, found := grammar[start]
if !found {
var noPos scanner.Position
v.error(noPos, "no start production "+start)
return
}

// initialize verifier
v.worklist = v.worklist[0:0]
v.reached = make(Grammar)
v.grammar = grammar

// work through the worklist
v.push(root)
for {
n := len(v.worklist) - 1
if n < 0 {
break
}
prod := v.worklist[n]
v.worklist = v.worklist[0:n]
v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
}

// check if all productions were reached
if len(v.reached) < len(v.grammar) {
for name, prod := range v.grammar {
if _, found := v.reached[name]; !found {
v.error(prod.Pos(), name+" is unreachable")
}
}
}
}

// Verify checks that:
// - all productions used are defined
// - all productions defined are used when beginning at start
// - lexical productions refer only to other lexical productions
//
// Position information is interpreted relative to the file set fset.
//
func Verify(grammar Grammar, start string) error {
var v verifier
v.verify(grammar, start)
return v.errors.Err()
}
71 changes: 71 additions & 0 deletions ebnf/ebnf_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ebnf

import (
"bytes"
"testing"
)

var goodGrammars = []string{
`Program = .`,

`Program = foo .
foo = "foo" .`,

`Program = "a" | "b" "c" .`,

`Program = "a" … "z" .`,

`Program = Song .
Song = { Note } .
Note = Do | (Re | Mi | Fa | So | La) | Ti .
Do = "c" .
Re = "d" .
Mi = "e" .
Fa = "f" .
So = "g" .
La = "a" .
Ti = ti .
ti = "b" .`,
}

var badGrammars = []string{
`Program = | .`,
`Program = | b .`,
`Program = a … b .`,
`Program = "a" … .`,
`Program = … "b" .`,
`Program = () .`,
`Program = [] .`,
`Program = {} .`,
}

func checkGood(t *testing.T, src string) {
grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
if err != nil {
t.Errorf("Parse(%s) failed: %v", src, err)
return
}
if err = Verify(grammar, "Program"); err != nil {
t.Errorf("Verify(%s) failed: %v", src, err)
}
}

func checkBad(t *testing.T, src string) {
_, err := Parse("", bytes.NewBuffer([]byte(src)))
if err == nil {
t.Errorf("Parse(%s) should have failed", src)
}
}

func TestGrammars(t *testing.T) {
for _, src := range goodGrammars {
checkGood(t, src)
}
for _, src := range badGrammars {
checkBad(t, src)
}
}
Loading

0 comments on commit b8afd77

Please sign in to comment.