@@ -35,6 +35,7 @@ type yyLex struct {
35
35
line string // current line being parsed
36
36
eof bool // flag to show EOF was read
37
37
error bool // set if an error has ocurred
38
+ errorString string // the string of the error
38
39
indentStack []int // indent stack to control INDENT / DEDENT tokens
39
40
state int // current state of state machine
40
41
currentIndent string // whitespace at start of current line
@@ -71,7 +72,7 @@ func (x *yyLex) refill() {
71
72
}
72
73
73
74
// Finds the length of a space and tab seperated string
74
- func ( x * yyLex ) countIndent (s string ) int {
75
+ func countIndent (s string ) int {
75
76
if len (s ) == 0 {
76
77
return 0
77
78
}
@@ -236,20 +237,71 @@ const (
236
237
isEof
237
238
)
238
239
240
+ // A Token with value
241
+ type LexToken struct {
242
+ token int
243
+ value py.Object
244
+ }
245
+
246
+ // Convert the yySymType and token into a LexToken
247
+ func newLexToken (token int , yylval * yySymType ) (lt LexToken ) {
248
+ lt .token = token
249
+ if token == NAME {
250
+ lt .value = py .String (yylval .str )
251
+ } else if token == STRING || token == NUMBER {
252
+ lt .value = yylval .obj
253
+ } else {
254
+ lt .value = nil
255
+ }
256
+ return
257
+ }
258
+
259
+ // String a LexToken
260
+ func (lt * LexToken ) String () string {
261
+ name := tokenToString [lt .token ]
262
+ if lt .value == nil {
263
+ return fmt .Sprintf ("%q (%d)" , name , lt .token )
264
+ }
265
+ return fmt .Sprintf ("%q (%d) = %T{%v}" , name , lt .token , lt .value , lt .value )
266
+ }
267
+
268
+ // An slice of LexToken~s
269
+ type LexTokens []LexToken
270
+
271
+ // Compare two LexTokens
272
+ func (as LexTokens ) Eq (bs []LexToken ) bool {
273
+ if len (as ) != len (bs ) {
274
+ return false
275
+ }
276
+ for i := range as {
277
+ a := as [i ]
278
+ b := bs [i ]
279
+ if a != b {
280
+ return false
281
+ }
282
+ }
283
+ return true
284
+ }
285
+
286
+ // String a LexTokens
287
+ func (lts LexTokens ) String () string {
288
+ buf := new (bytes.Buffer )
289
+ buf .WriteString ("[" )
290
+ for i := range lts {
291
+ lt := lts [i ]
292
+ buf .WriteString ("{" )
293
+ buf .WriteString (lt .String ())
294
+ buf .WriteString ("}, " )
295
+ }
296
+ buf .WriteString ("]" )
297
+ return buf .String ()
298
+ }
299
+
239
300
// The parser calls this method to get each new token. This
240
301
// implementation returns operators and NUM.
241
302
func (x * yyLex ) Lex (yylval * yySymType ) (ret int ) {
242
303
if yyDebug >= 2 {
243
- defer func () {
244
- name := tokenToString [ret ]
245
- if ret == NAME {
246
- fmt .Printf ("LEX> %q (%d) = %q\n " , name , ret , yylval .str )
247
- } else if ret == STRING || ret == NUMBER {
248
- fmt .Printf ("LEX> %q (%d) = %T{%v}\n " , name , ret , yylval .obj , yylval .obj )
249
- } else {
250
- fmt .Printf ("LEX> %q (%d) \n " , name , ret )
251
- }
252
- }()
304
+ defer func () { fmt .Printf ("LEX> %v\n " , newLexToken (ret , yylval )) }()
253
305
}
254
306
255
307
for {
@@ -286,7 +338,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
286
338
continue
287
339
}
288
340
// See if indent has changed and issue INDENT / DEDENT
289
- indent := x . countIndent (x .currentIndent )
341
+ indent := countIndent (x .currentIndent )
290
342
indentStackTop := x .indentStack [len (x .indentStack )- 1 ]
291
343
switch {
292
344
case indent > indentStackTop :
@@ -329,7 +381,8 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329
381
// Check if continuation character
330
382
if x .line [0 ] == '\\' && (len (x .line ) <= 1 || x .line [1 ] == '\n' ) {
331
383
if x .eof {
332
- return eof
384
+ x .state = checkEof
385
+ continue
333
386
}
334
387
x .refill ()
335
388
x .state = parseTokens
@@ -385,7 +438,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
385
438
}
386
439
387
440
// Nothing we recognise found
388
- x .Error ("Syntax error " )
441
+ x .Error ("invalid syntax " )
389
442
return eof
390
443
case checkEof :
391
444
if x .eof {
@@ -567,6 +620,7 @@ isNumber:
567
620
} else {
568
621
// Discard numbers with leading 0 except all 0s
569
622
if illegalDecimalInteger .FindString (x .line ) != "" {
623
+ x .Error ("illegal decimal with leading zero" )
570
624
return eofError , nil
571
625
}
572
626
value = py .IntNew (py .IntType , py.Tuple {py .String (s ), py .Int (10 )}, nil )
@@ -707,6 +761,7 @@ foundEndOfString:
707
761
// The parser calls this method on a parse error.
708
762
func (x * yyLex ) Error (s string ) {
709
763
x .error = true
764
+ x .errorString = s
710
765
if yyDebug >= 1 {
711
766
log .Printf ("Parse error: %s" , s )
712
767
log .Printf ("Parse buffer %q" , x .line )
@@ -719,6 +774,14 @@ func (x *yyLex) Errorf(format string, a ...interface{}) {
719
774
x .Error (fmt .Sprintf (format , a ... ))
720
775
}
721
776
777
+ // Returns an python error for the current yyLex
778
+ func (x * yyLex ) ErrorReturn () error {
779
+ if x .error {
780
+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error: %s" , x .errorString )
781
+ }
782
+ return nil
783
+ }
784
+
722
785
// Set the debug level 0 = off, 4 = max
723
786
func SetDebug (level int ) {
724
787
yyDebug = level
@@ -728,24 +791,31 @@ func SetDebug(level int) {
728
791
func Parse (in io.Reader ) error {
729
792
lex := NewLex (in )
730
793
yyParse (lex )
731
- if lex .error {
732
- return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
733
- }
734
- return nil
794
+ return lex .ErrorReturn ()
735
795
}
736
796
737
- // Lex a file only
738
- func Lex (in io.Reader ) error {
797
+ // Parse a string
798
+ func ParseString (in string ) error {
799
+ return Parse (bytes .NewBufferString (in ))
800
+ }
801
+
802
+ // Lex a file only, returning a sequence of tokens
803
+ func Lex (in io.Reader ) (lts LexTokens , err error ) {
739
804
lex := NewLex (in )
740
805
yylval := yySymType {}
741
806
for {
742
807
ret := lex .Lex (& yylval )
743
808
if ret == eof {
744
809
break
745
810
}
811
+ lt := newLexToken (ret , & yylval )
812
+ lts = append (lts , lt )
746
813
}
747
- if lex .error {
748
- return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
749
- }
750
- return nil
814
+ err = lex .ErrorReturn ()
815
+ return
816
+ }
817
+
818
+ // Lex a string
819
+ func LexString (in string ) (lts LexTokens , err error ) {
820
+ return Lex (bytes .NewBufferString (in ))
751
821
}
0 commit comments