Skip to content

Commit 4d6ae14

Browse files
committed
Tests for Lexer
1 parent 1e62f18 commit 4d6ae14

File tree

3 files changed

+390
-25
lines changed

3 files changed

+390
-25
lines changed

parser/lexer.go

Lines changed: 94 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type yyLex struct {
3535
line string // current line being parsed
3636
eof bool // flag to show EOF was read
3737
error bool // set if an error has ocurred
38+
errorString string // the string of the error
3839
indentStack []int // indent stack to control INDENT / DEDENT tokens
3940
state int // current state of state machine
4041
currentIndent string // whitespace at start of current line
@@ -71,7 +72,7 @@ func (x *yyLex) refill() {
7172
}
7273

7374
// Finds the length of a space and tab seperated string
74-
func (x *yyLex) countIndent(s string) int {
75+
func countIndent(s string) int {
7576
if len(s) == 0 {
7677
return 0
7778
}
@@ -236,20 +237,71 @@ const (
236237
isEof
237238
)
238239

240+
// A Token with value
241+
type LexToken struct {
242+
token int
243+
value py.Object
244+
}
245+
246+
// Convert the yySymType and token into a LexToken
247+
func newLexToken(token int, yylval *yySymType) (lt LexToken) {
248+
lt.token = token
249+
if token == NAME {
250+
lt.value = py.String(yylval.str)
251+
} else if token == STRING || token == NUMBER {
252+
lt.value = yylval.obj
253+
} else {
254+
lt.value = nil
255+
}
256+
return
257+
}
258+
259+
// String a LexToken
260+
func (lt *LexToken) String() string {
261+
name := tokenToString[lt.token]
262+
if lt.value == nil {
263+
return fmt.Sprintf("%q (%d)", name, lt.token)
264+
}
265+
return fmt.Sprintf("%q (%d) = %T{%v}", name, lt.token, lt.value, lt.value)
266+
}
267+
268+
// An slice of LexToken~s
269+
type LexTokens []LexToken
270+
271+
// Compare two LexTokens
272+
func (as LexTokens) Eq(bs []LexToken) bool {
273+
if len(as) != len(bs) {
274+
return false
275+
}
276+
for i := range as {
277+
a := as[i]
278+
b := bs[i]
279+
if a != b {
280+
return false
281+
}
282+
}
283+
return true
284+
}
285+
286+
// String a LexTokens
287+
func (lts LexTokens) String() string {
288+
buf := new(bytes.Buffer)
289+
buf.WriteString("[")
290+
for i := range lts {
291+
lt := lts[i]
292+
buf.WriteString("{")
293+
buf.WriteString(lt.String())
294+
buf.WriteString("}, ")
295+
}
296+
buf.WriteString("]")
297+
return buf.String()
298+
}
299+
239300
// The parser calls this method to get each new token. This
240301
// implementation returns operators and NUM.
241302
func (x *yyLex) Lex(yylval *yySymType) (ret int) {
242303
if yyDebug >= 2 {
243-
defer func() {
244-
name := tokenToString[ret]
245-
if ret == NAME {
246-
fmt.Printf("LEX> %q (%d) = %q\n", name, ret, yylval.str)
247-
} else if ret == STRING || ret == NUMBER {
248-
fmt.Printf("LEX> %q (%d) = %T{%v}\n", name, ret, yylval.obj, yylval.obj)
249-
} else {
250-
fmt.Printf("LEX> %q (%d) \n", name, ret)
251-
}
252-
}()
304+
defer func() { fmt.Printf("LEX> %v\n", newLexToken(ret, yylval)) }()
253305
}
254306

255307
for {
@@ -286,7 +338,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
286338
continue
287339
}
288340
// See if indent has changed and issue INDENT / DEDENT
289-
indent := x.countIndent(x.currentIndent)
341+
indent := countIndent(x.currentIndent)
290342
indentStackTop := x.indentStack[len(x.indentStack)-1]
291343
switch {
292344
case indent > indentStackTop:
@@ -329,7 +381,8 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329381
// Check if continuation character
330382
if x.line[0] == '\\' && (len(x.line) <= 1 || x.line[1] == '\n') {
331383
if x.eof {
332-
return eof
384+
x.state = checkEof
385+
continue
333386
}
334387
x.refill()
335388
x.state = parseTokens
@@ -385,7 +438,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
385438
}
386439

387440
// Nothing we recognise found
388-
x.Error("Syntax error")
441+
x.Error("invalid syntax")
389442
return eof
390443
case checkEof:
391444
if x.eof {
@@ -567,6 +620,7 @@ isNumber:
567620
} else {
568621
// Discard numbers with leading 0 except all 0s
569622
if illegalDecimalInteger.FindString(x.line) != "" {
623+
x.Error("illegal decimal with leading zero")
570624
return eofError, nil
571625
}
572626
value = py.IntNew(py.IntType, py.Tuple{py.String(s), py.Int(10)}, nil)
@@ -707,6 +761,7 @@ foundEndOfString:
707761
// The parser calls this method on a parse error.
708762
func (x *yyLex) Error(s string) {
709763
x.error = true
764+
x.errorString = s
710765
if yyDebug >= 1 {
711766
log.Printf("Parse error: %s", s)
712767
log.Printf("Parse buffer %q", x.line)
@@ -719,6 +774,14 @@ func (x *yyLex) Errorf(format string, a ...interface{}) {
719774
x.Error(fmt.Sprintf(format, a...))
720775
}
721776

777+
// Returns an python error for the current yyLex
778+
func (x *yyLex) ErrorReturn() error {
779+
if x.error {
780+
return py.ExceptionNewf(py.SyntaxError, "Syntax Error: %s", x.errorString)
781+
}
782+
return nil
783+
}
784+
722785
// Set the debug level 0 = off, 4 = max
723786
func SetDebug(level int) {
724787
yyDebug = level
@@ -728,24 +791,31 @@ func SetDebug(level int) {
728791
func Parse(in io.Reader) error {
729792
lex := NewLex(in)
730793
yyParse(lex)
731-
if lex.error {
732-
return py.ExceptionNewf(py.SyntaxError, "Syntax Error")
733-
}
734-
return nil
794+
return lex.ErrorReturn()
735795
}
736796

737-
// Lex a file only
738-
func Lex(in io.Reader) error {
797+
// Parse a string
798+
func ParseString(in string) error {
799+
return Parse(bytes.NewBufferString(in))
800+
}
801+
802+
// Lex a file only, returning a sequence of tokens
803+
func Lex(in io.Reader) (lts LexTokens, err error) {
739804
lex := NewLex(in)
740805
yylval := yySymType{}
741806
for {
742807
ret := lex.Lex(&yylval)
743808
if ret == eof {
744809
break
745810
}
811+
lt := newLexToken(ret, &yylval)
812+
lts = append(lts, lt)
746813
}
747-
if lex.error {
748-
return py.ExceptionNewf(py.SyntaxError, "Syntax Error")
749-
}
750-
return nil
814+
err = lex.ErrorReturn()
815+
return
816+
}
817+
818+
// Lex a string
819+
func LexString(in string) (lts LexTokens, err error) {
820+
return Lex(bytes.NewBufferString(in))
751821
}

0 commit comments

Comments
 (0)