Skip to content

Commit f1a6795

Browse files
committed
Fix lexer
1 parent 9863c76 commit f1a6795

File tree

1 file changed

+122
-96
lines changed

1 file changed

+122
-96
lines changed

exercises/TeaC/teac.l

Lines changed: 122 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -2,136 +2,162 @@
22

33
%{
44
#include <stdio.h>
5+
#include "teac.tab.h"
56
extern int yylineno;
67
extern int yyerror();
8+
extern FILE *yyin;
79
%}
810

9-
10-
/* KEYWORDS
11+
/* Keywords */
12+
INT_TYPE "int"
13+
REAL_TYPE "real"
14+
BOOL_TYPE "bool"
15+
STRING_TYPE "string"
16+
TRUE_KW "true"
17+
FALSE_KW "false"
18+
IF_KW "if"
19+
THEN_KW "then"
20+
ELSE_KW "else"
21+
FI_KW "fi"
22+
WHILE_KW "while"
23+
LOOP_KW "loop"
24+
POOL_KW "pool"
25+
CONST_KW "const"
26+
LET_KW "let"
27+
RETURN_KW "return"
28+
START_KW "start"
1129

1230
/* identifiers */
1331
ID [a-zA-Z]+[0-9_]*
1432

1533
/* integer positive constant */
16-
CONST_INTEGER "0"|[1-9][0-9]*
34+
CONST_INTEGER_NUM "0"|[1-9][0-9]*
1735

1836
/* real positive constant */
1937
INTEGER_PART (0{1}|[0-9]+)
2038
FRACTIONAL_PART "."[0-9]*
2139
EXPONENT_PART ("e"|"E")?("+"|"-")?[1-9]*
22-
REAL {INTEGER_PART}{FRACTIONAL_PART}{EXPONENT_PART}
40+
REAL_NUM {INTEGER_PART}{FRACTIONAL_PART}{EXPONENT_PART}
2341

2442
/* string constant */
2543
STRING \"[ -~]*\"
2644

2745
/* operators */
28-
EQ_OP "="
29-
LT_OP "<"
30-
LTEQ_OP "<="
31-
NE_OP "!="
32-
PLUS_OP "+"
33-
MINUS_OP "-"
34-
AND_OP "and"|"AND"
35-
OR_OP "or"|"OR"
36-
NOT_OP "not"|"NOT"
46+
EQ_OP "="
47+
LT_OP "<"
48+
LTEQ_OP "<="
49+
NEQ_OP "!="
50+
AND_OP "and"
51+
OR_OP "or"
52+
NOT_OP "not"
53+
PLUS_OP "+"
54+
MINUS_OP "-"
55+
MUL_OP "*"
56+
DIV_OP "/"
57+
MOD_OP "%"
58+
ASSIGN_OP "<-"
3759

3860
/* delimiters */
39-
SEMI ";"
40-
OP "("
41-
CP ")"
42-
COMMA ","
43-
OB "["
44-
CB "]"
45-
OCB "{"
46-
CCB "}"
47-
ASSIGN "<-"
48-
COLON ":"
49-
ARROW_OP "=>"
50-
51-
/* multiline comment */
52-
%x ML_COMMENT
53-
61+
SEMI ";"
62+
LEFT_PAR "("
63+
RIGHT_PAR ")"
64+
COMMA ","
65+
LEFT_BRACKET "["
66+
RIGHT_BRACKET "]"
67+
LEFT_CURLY_BRACKET "{"
68+
RIGHT_CURLY_BRACKET "}"
69+
ARROW_DELIMITER "=>"
70+
COLON ":"
5471

5572
/* Whitespaces */
5673
WHITESPACE [ \t\n\r]+
5774

75+
/* multi line comment */
76+
ML_COMMENT "(*"
77+
%x ML_COMMENT
78+
79+
/* single line comment */
80+
SL_COMMENT "--".*
81+
82+
/* new line */
83+
NEW_LINE "\n"
84+
5885
%%
5986

60-
"int" { printf("%d: Token: INT_KW: %s\n", yylineno, yytext); }
61-
"real" { printf("%d: Token: REAL_KW: %s\n", yylineno, yytext); }
62-
"bool" { printf("%d: Token: BOOL_KW: %s\n", yylineno, yytext); }
63-
"string" { printf("%d: Token: STRING_KW: %s\n", yylineno, yytext); }
64-
"true" { printf("%d: Token: TRUE_KW: %s\n", yylineno, yytext); }
65-
"false" { printf("%d: Token: FALSE_KW: %s\n", yylineno, yytext); }
66-
"if" { printf("%d: Token: IF_KW: %s\n", yylineno, yytext); }
67-
"then" { printf("%d: Token: THEN_KW: %s\n", yylineno, yytext); }
68-
"else" { printf("%d: Token: ELSE_KW: %s\n", yylineno, yytext); }
69-
"fi" { printf("%d: Token: FI_KW: %s\n", yylineno, yytext); }
70-
"while" { printf("%d: Token: WHILE_KW: %s\n", yylineno, yytext); }
71-
"loop" { printf("%d: Token: LOOP_KW: %s\n", yylineno, yytext); }
72-
"pool" { printf("%d: Token: POOL_KW: %s\n", yylineno, yytext); }
73-
"const" { printf("%d: Token: CONST_KW: %s\n", yylineno, yytext); }
74-
"let" { printf("%d: Token: LET_KW: %s\n", yylineno, yytext); }
75-
"return" { printf("%d: Token: RETURN_KW: %s\n", yylineno, yytext); }
76-
"start" { printf("%d: Token: START_KW: %s\n", yylineno, yytext); }
77-
78-
{CONST_INTEGER} { printf("%d: Token CONST_INTEGER: %s\n", yylineno, yytext); }
79-
80-
{REAL} { printf("%d: Token REAL: %s\n", yylineno, yytext); }
81-
82-
{STRING} { printf("%d: Token: STRING: %s\n", yylineno, yytext); }
83-
84-
{EQ_OP} { printf("%d: Token EQ_OP: %s\n", yylineno, yytext); }
85-
{LT_OP} { printf("%d: Token LT_OP: %s\n", yylineno, yytext); }
86-
{LTEQ_OP} { printf("%d: Token LTEQ_OP: %s\n", yylineno, yytext); }
87-
{NE_OP} { printf("%d: Token NE_OP: %s\n", yylineno, yytext); }
88-
{PLUS_OP} { printf("%d: Token PLUS_OP: %s\n", yylineno, yytext); }
89-
{MINUS_OP} { printf("%d: Token MINUS_OP: %s\n", yylineno, yytext); }
90-
{AND_OP} { printf("%d: Token AND_OP: %s\n", yylineno, yytext); }
91-
{OR_OP} { printf("%d: Token OR_OP: %s\n", yylineno, yytext); }
92-
{NOT_OP} { printf("%d: Token NOT_OP: %s\n", yylineno, yytext); }
93-
94-
{ID} { printf("%d: Token: IDENTIFIER: %s\n", yylineno, yytext); }
95-
96-
{SEMI} { printf("%d: Token SEMI: %s\n", yylineno, yytext); }
97-
{OP} { printf("%d: Token OP: %s\n", yylineno, yytext); }
98-
{CP} { printf("%d: Token CP: %s\n", yylineno, yytext); }
99-
{COMMA} { printf("%d: Token COMMA: %s\n", yylineno, yytext); }
100-
{OB} { printf("%d: Token OB: %s\n", yylineno, yytext); }
101-
{CB} { printf("%d: Token CB: %s\n", yylineno, yytext); }
102-
{OCB} { printf("%d: Token OCB: %s\n", yylineno, yytext); }
103-
{CCB} { printf("%d: Token CCB: %s\n", yylineno, yytext); }
104-
{ASSIGN} { printf("%d: Token ASSIGN: %s\n", yylineno, yytext); }
105-
{COLON} { printf("%d: Token COLON: %s\n", yylineno, yytext); }
106-
{ARROW_OP} { printf("%d: Token: ARROW_OP: %s\n", yylineno, yytext); }
107-
108-
"--".* { printf("%d: Single line comment detected\n", yylineno); }
109-
110-
"(*" { printf("%d: Multiline comment starts\n", yylineno); BEGIN(ML_COMMENT); }
87+
{INT_TYPE} { printf("%d: Token: INT_TYPE: %s\n", yylineno, yytext); return INT_TYPE; }
88+
{REAL_TYPE} { printf("%d: Token: REAL_TYPE %s\n", yylineno, yytext); return REAL_TYPE; }
89+
{BOOL_TYPE} { printf("%d: Token: BOOL_TYPE: %s\n", yylineno, yytext); return BOOL_TYPE; }
90+
{STRING_TYPE} { printf("%d: Token: STRING_TYPE: %s\n", yylineno, yytext); return STRING_TYPE; }
91+
{TRUE_KW} { printf("%d: Token: TRUE_KW: %s\n", yylineno, yytext); return TRUE_KW; }
92+
{FALSE_KW} { printf("%d: Token: FALSE_KW: %s\n", yylineno, yytext); return FALSE_KW; }
93+
{IF_KW} { printf("%d: Token: IF_KW: %s\n", yylineno, yytext); return IF_KW; }
94+
{THEN_KW} { printf("%d: Token: THEN_KW: %s\n", yylineno, yytext); return THEN_KW; }
95+
{ELSE_KW} { printf("%d: Token: ELSE_KW: %s\n", yylineno, yytext); return ELSE_KW; }
96+
{FI_KW} { printf("%d: Token: FI_KW: %s\n", yylineno, yytext); return FI_KW; }
97+
{WHILE_KW} { printf("%d: Token: WHILE_KW: %s\n", yylineno, yytext); return WHILE_KW; }
98+
{LOOP_KW} { printf("%d: Token: LOOP_KW: %s\n", yylineno, yytext); return LOOP_KW; }
99+
{POOL_KW} { printf("%d: Token: POOL_KW: %s\n", yylineno, yytext); return POOL_KW; }
100+
{CONST_KW} { printf("%d: Token: CONST_KW: %s\n", yylineno, yytext); return CONST_KW; }
101+
{LET_KW} { printf("%d: Token: LET_KW: %s\n", yylineno, yytext); return LET_KW; }
102+
{RETURN_KW} { printf("%d: Token: RETURN_KW: %s\n", yylineno, yytext); return RETURN_KW; }
103+
{START_KW} { printf("%d: Token: START_KW: %s\n", yylineno, yytext); return START_KW; }
104+
105+
{CONST_INTEGER_NUM} {
106+
printf("%d: Token CONST_INTEGER_NUM: %s\n", yylineno, yytext);
107+
return CONST_INTEGER_NUM;
108+
}
109+
110+
{REAL_NUM} {
111+
printf("%d: Token REAL_NUM: %s\n", yylineno, yytext);
112+
return REAL_NUM;
113+
}
114+
115+
{STRING} {
116+
printf("%d: Token: STRING: %s\n", yylineno, yytext);
117+
return STRING;
118+
}
119+
120+
{EQ_OP} { printf("%d: Token EQ_OP: %s\n", yylineno, yytext); return EQ_OP; }
121+
{LT_OP} { printf("%d: Token LT_OP: %s\n", yylineno, yytext); return LT_OP; }
122+
{LTEQ_OP} { printf("%d: Token LTEQ_OP: %s\n", yylineno, yytext); return LTEQ_OP; }
123+
{NEQ_OP} { printf("%d: Token NEQ_OP: %s\n", yylineno, yytext); return NEQ_OP; }
124+
{AND_OP} { printf("%d: Token AND_OP: %s\n", yylineno, yytext); return AND_OP; }
125+
{OR_OP} { printf("%d: Token OR_OP: %s\n", yylineno, yytext); return OR_OP; }
126+
{NOT_OP} { printf("%d: Token NOT_OP: %s\n", yylineno, yytext); return NOT_OP; }
127+
{PLUS_OP} { printf("%d: Token PLUS_OP: %s\n", yylineno, yytext); return PLUS_OP; }
128+
{MINUS_OP} { printf("%d: Token MINUS_OP: %s\n", yylineno, yytext); return MINUS_OP; }
129+
{MUL_OP} { printf("%d: Token MUL_OP: %s\n", yylineno, yytext); return MUL_OP; }
130+
{DIV_OP} { printf("%d: Token DIV_OP: %s\n", yylineno, yytext); return DIV_OP; }
131+
{MOD_OP} { printf("%d: Token MOD_OP: %s\n", yylineno, yytext); return MOD_OP; }
132+
{ASSIGN_OP} { printf("%d: Token ASSIGN_OP: %s\n", yylineno, yytext); return ASSIGN_OP; }
133+
134+
{SEMI} { printf("%d: Token SEMI: %s\n", yylineno, yytext); return SEMI;}
135+
{LEFT_PAR} { printf("%d: Token LEFT_PAR: %s\n", yylineno, yytext); return LEFT_PAR; }
136+
{RIGHT_PAR} { printf("%d: Token RIGHT_PAR: %s\n", yylineno, yytext); return RIGHT_PAR; }
137+
{COMMA} { printf("%d: Token COMMA: %s\n", yylineno, yytext); return COMMA; }
138+
{LEFT_BRACKET} { printf("%d: Token LEFT_BRACKET: %s\n", yylineno, yytext); return LEFT_BRACKET; }
139+
{RIGHT_BRACKET} { printf("%d: Token RIGHT_BRACKET: %s\n", yylineno, yytext); return RIGHT_BRACKET; }
140+
{LEFT_CURLY_BRACKET} { printf("%d: Token LEFT_CURLY_BRACKET: %s\n", yylineno, yytext); return LEFT_CURLY_BRACKET; }
141+
{RIGHT_CURLY_BRACKET} { printf("%d: Token RIGHT_CURLY_BRACKET: %s\n", yylineno, yytext); return RIGHT_CURLY_BRACKET; }
142+
{ARROW_DELIMITER} { printf("%d: Token: ARROW_DELIMITER %s\n", yylineno, yytext); return ARROW_DELIMITER; }
143+
{COLON} { printf("%d: Token COLON: %s\n", yylineno, yytext); return COLON; }
144+
145+
{ID} {
146+
printf("%d: Token: IDENTIFIER: %s\n", yylineno, yytext);
147+
return ID;
148+
}
149+
150+
{SL_COMMENT} { printf("%d: Single line comment detected\n", yylineno); }
151+
152+
{ML_COMMENT} { printf("%d: Multiline comment starts\n", yylineno); BEGIN(ML_COMMENT); }
111153
<ML_COMMENT>[^*\n]+ /* Ignore whatever is inside the comment */
112154
<ML_COMMENT>"*)" { printf("%d: Multiline commment ends\n", yylineno); BEGIN(INITIAL); }
113155
<ML_COMMENT>"\n" /* Updates yylineno automatically */
114156

115-
\n /* Updates yylineno automatically */
157+
{NEW_LINE} /* Updates yylineno automatically */
116158

117159
{WHITESPACE} /* Ignore whitespace */
118160

119161
. { yyerror("Unrecognized token "); }
120162

121163
%%
122-
123-
int main(int argc, char *argv[]) {
124-
if (argc > 1) {
125-
if (!(yyin = fopen(argv[1], "r"))) {
126-
perror(argv[1]);
127-
return 1;
128-
}
129-
}
130-
yylineno = 1;
131-
yylex();
132-
return 0;
133-
}
134-
135-
int yyerror(const char *msg) {
136-
printf("%d: %s: '%s'\n", yylineno, msg, yytext);
137-
}

0 commit comments

Comments
 (0)