-
Notifications
You must be signed in to change notification settings - Fork 211
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
go.exp: add packages ebnf, gotype, inotify, locale, ssa, utf8string, …
…winfsnotify. Packages moved from main repo, with minor adjustments so tests pass. See also https://golang.org/cl/7456047 R=golang-dev, minux.ma CC=golang-dev https://golang.org/cl/7463043
- Loading branch information
Showing
81 changed files
with
76,083 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
// Copyright 2009 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
// Package ebnf is a library for EBNF grammars. The input is text ([]byte) | ||
// satisfying the following grammar (represented itself in EBNF): | ||
// | ||
// Production = name "=" [ Expression ] "." . | ||
// Expression = Alternative { "|" Alternative } . | ||
// Alternative = Term { Term } . | ||
// Term = name | token [ "…" token ] | Group | Option | Repetition . | ||
// Group = "(" Expression ")" . | ||
// Option = "[" Expression "]" . | ||
// Repetition = "{" Expression "}" . | ||
// | ||
// A name is a Go identifier, a token is a Go string, and comments | ||
// and white space follow the same rules as for the Go language. | ||
// Production names starting with an uppercase Unicode letter denote | ||
// non-terminal productions (i.e., productions which allow white-space | ||
// and comments between tokens); all other production names denote | ||
// lexical productions. | ||
// | ||
package ebnf | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"text/scanner" | ||
"unicode" | ||
"unicode/utf8" | ||
) | ||
|
||
// ---------------------------------------------------------------------------- | ||
// Error handling | ||
|
||
type errorList []error | ||
|
||
func (list errorList) Err() error { | ||
if len(list) == 0 { | ||
return nil | ||
} | ||
return list | ||
} | ||
|
||
func (list errorList) Error() string { | ||
switch len(list) { | ||
case 0: | ||
return "no errors" | ||
case 1: | ||
return list[0].Error() | ||
} | ||
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) | ||
} | ||
|
||
func newError(pos scanner.Position, msg string) error { | ||
return errors.New(fmt.Sprintf("%s: %s", pos, msg)) | ||
} | ||
|
||
// ---------------------------------------------------------------------------- | ||
// Internal representation | ||
|
||
type ( | ||
// An Expression node represents a production expression. | ||
Expression interface { | ||
// Pos is the position of the first character of the syntactic construct | ||
Pos() scanner.Position | ||
} | ||
|
||
// An Alternative node represents a non-empty list of alternative expressions. | ||
Alternative []Expression // x | y | z | ||
|
||
// A Sequence node represents a non-empty list of sequential expressions. | ||
Sequence []Expression // x y z | ||
|
||
// A Name node represents a production name. | ||
Name struct { | ||
StringPos scanner.Position | ||
String string | ||
} | ||
|
||
// A Token node represents a literal. | ||
Token struct { | ||
StringPos scanner.Position | ||
String string | ||
} | ||
|
||
// A List node represents a range of characters. | ||
Range struct { | ||
Begin, End *Token // begin ... end | ||
} | ||
|
||
// A Group node represents a grouped expression. | ||
Group struct { | ||
Lparen scanner.Position | ||
Body Expression // (body) | ||
} | ||
|
||
// An Option node represents an optional expression. | ||
Option struct { | ||
Lbrack scanner.Position | ||
Body Expression // [body] | ||
} | ||
|
||
// A Repetition node represents a repeated expression. | ||
Repetition struct { | ||
Lbrace scanner.Position | ||
Body Expression // {body} | ||
} | ||
|
||
// A Production node represents an EBNF production. | ||
Production struct { | ||
Name *Name | ||
Expr Expression | ||
} | ||
|
||
// A Bad node stands for pieces of source code that lead to a parse error. | ||
Bad struct { | ||
TokPos scanner.Position | ||
Error string // parser error message | ||
} | ||
|
||
// A Grammar is a set of EBNF productions. The map | ||
// is indexed by production name. | ||
// | ||
Grammar map[string]*Production | ||
) | ||
|
||
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative | ||
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences | ||
func (x *Name) Pos() scanner.Position { return x.StringPos } | ||
func (x *Token) Pos() scanner.Position { return x.StringPos } | ||
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } | ||
func (x *Group) Pos() scanner.Position { return x.Lparen } | ||
func (x *Option) Pos() scanner.Position { return x.Lbrack } | ||
func (x *Repetition) Pos() scanner.Position { return x.Lbrace } | ||
func (x *Production) Pos() scanner.Position { return x.Name.Pos() } | ||
func (x *Bad) Pos() scanner.Position { return x.TokPos } | ||
|
||
// ---------------------------------------------------------------------------- | ||
// Grammar verification | ||
|
||
func isLexical(name string) bool { | ||
ch, _ := utf8.DecodeRuneInString(name) | ||
return !unicode.IsUpper(ch) | ||
} | ||
|
||
type verifier struct { | ||
errors errorList | ||
worklist []*Production | ||
reached Grammar // set of productions reached from (and including) the root production | ||
grammar Grammar | ||
} | ||
|
||
func (v *verifier) error(pos scanner.Position, msg string) { | ||
v.errors = append(v.errors, newError(pos, msg)) | ||
} | ||
|
||
func (v *verifier) push(prod *Production) { | ||
name := prod.Name.String | ||
if _, found := v.reached[name]; !found { | ||
v.worklist = append(v.worklist, prod) | ||
v.reached[name] = prod | ||
} | ||
} | ||
|
||
func (v *verifier) verifyChar(x *Token) rune { | ||
s := x.String | ||
if utf8.RuneCountInString(s) != 1 { | ||
v.error(x.Pos(), "single char expected, found "+s) | ||
return 0 | ||
} | ||
ch, _ := utf8.DecodeRuneInString(s) | ||
return ch | ||
} | ||
|
||
func (v *verifier) verifyExpr(expr Expression, lexical bool) { | ||
switch x := expr.(type) { | ||
case nil: | ||
// empty expression | ||
case Alternative: | ||
for _, e := range x { | ||
v.verifyExpr(e, lexical) | ||
} | ||
case Sequence: | ||
for _, e := range x { | ||
v.verifyExpr(e, lexical) | ||
} | ||
case *Name: | ||
// a production with this name must exist; | ||
// add it to the worklist if not yet processed | ||
if prod, found := v.grammar[x.String]; found { | ||
v.push(prod) | ||
} else { | ||
v.error(x.Pos(), "missing production "+x.String) | ||
} | ||
// within a lexical production references | ||
// to non-lexical productions are invalid | ||
if lexical && !isLexical(x.String) { | ||
v.error(x.Pos(), "reference to non-lexical production "+x.String) | ||
} | ||
case *Token: | ||
// nothing to do for now | ||
case *Range: | ||
i := v.verifyChar(x.Begin) | ||
j := v.verifyChar(x.End) | ||
if i >= j { | ||
v.error(x.Pos(), "decreasing character range") | ||
} | ||
case *Group: | ||
v.verifyExpr(x.Body, lexical) | ||
case *Option: | ||
v.verifyExpr(x.Body, lexical) | ||
case *Repetition: | ||
v.verifyExpr(x.Body, lexical) | ||
case *Bad: | ||
v.error(x.Pos(), x.Error) | ||
default: | ||
panic(fmt.Sprintf("internal error: unexpected type %T", expr)) | ||
} | ||
} | ||
|
||
func (v *verifier) verify(grammar Grammar, start string) { | ||
// find root production | ||
root, found := grammar[start] | ||
if !found { | ||
var noPos scanner.Position | ||
v.error(noPos, "no start production "+start) | ||
return | ||
} | ||
|
||
// initialize verifier | ||
v.worklist = v.worklist[0:0] | ||
v.reached = make(Grammar) | ||
v.grammar = grammar | ||
|
||
// work through the worklist | ||
v.push(root) | ||
for { | ||
n := len(v.worklist) - 1 | ||
if n < 0 { | ||
break | ||
} | ||
prod := v.worklist[n] | ||
v.worklist = v.worklist[0:n] | ||
v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) | ||
} | ||
|
||
// check if all productions were reached | ||
if len(v.reached) < len(v.grammar) { | ||
for name, prod := range v.grammar { | ||
if _, found := v.reached[name]; !found { | ||
v.error(prod.Pos(), name+" is unreachable") | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Verify checks that: | ||
// - all productions used are defined | ||
// - all productions defined are used when beginning at start | ||
// - lexical productions refer only to other lexical productions | ||
// | ||
// Position information is interpreted relative to the file set fset. | ||
// | ||
func Verify(grammar Grammar, start string) error { | ||
var v verifier | ||
v.verify(grammar, start) | ||
return v.errors.Err() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Copyright 2009 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package ebnf | ||
|
||
import ( | ||
"bytes" | ||
"testing" | ||
) | ||
|
||
var goodGrammars = []string{ | ||
`Program = .`, | ||
|
||
`Program = foo . | ||
foo = "foo" .`, | ||
|
||
`Program = "a" | "b" "c" .`, | ||
|
||
`Program = "a" … "z" .`, | ||
|
||
`Program = Song . | ||
Song = { Note } . | ||
Note = Do | (Re | Mi | Fa | So | La) | Ti . | ||
Do = "c" . | ||
Re = "d" . | ||
Mi = "e" . | ||
Fa = "f" . | ||
So = "g" . | ||
La = "a" . | ||
Ti = ti . | ||
ti = "b" .`, | ||
} | ||
|
||
var badGrammars = []string{ | ||
`Program = | .`, | ||
`Program = | b .`, | ||
`Program = a … b .`, | ||
`Program = "a" … .`, | ||
`Program = … "b" .`, | ||
`Program = () .`, | ||
`Program = [] .`, | ||
`Program = {} .`, | ||
} | ||
|
||
func checkGood(t *testing.T, src string) { | ||
grammar, err := Parse("", bytes.NewBuffer([]byte(src))) | ||
if err != nil { | ||
t.Errorf("Parse(%s) failed: %v", src, err) | ||
return | ||
} | ||
if err = Verify(grammar, "Program"); err != nil { | ||
t.Errorf("Verify(%s) failed: %v", src, err) | ||
} | ||
} | ||
|
||
func checkBad(t *testing.T, src string) { | ||
_, err := Parse("", bytes.NewBuffer([]byte(src))) | ||
if err == nil { | ||
t.Errorf("Parse(%s) should have failed", src) | ||
} | ||
} | ||
|
||
func TestGrammars(t *testing.T) { | ||
for _, src := range goodGrammars { | ||
checkGood(t, src) | ||
} | ||
for _, src := range badGrammars { | ||
checkBad(t, src) | ||
} | ||
} |
Oops, something went wrong.