From 792debd4e3d377bca7b6a957f100215a4ab0a7ab Mon Sep 17 00:00:00 2001 From: Emiliano Nunez Date: Sun, 11 Feb 2018 00:10:46 -0300 Subject: [PATCH] Vars Declaration --- chapter10/grammar.g | 69 ++++ chapter10/interpreter.hpp | 137 ++++++++ chapter10/lexer.hpp | 192 +++++++++++ chapter10/main.cpp | 45 +++ chapter10/nose.py | 634 +++++++++++++++++++++++++++++++++++ chapter10/parser.hpp | 399 ++++++++++++++++++++++ chapter10/printer.hpp | 144 ++++++++ chapter10/test/test_fail.pas | 6 + chapter10/test/test_ok.pas | 10 + 9 files changed, 1636 insertions(+) create mode 100644 chapter10/grammar.g create mode 100644 chapter10/interpreter.hpp create mode 100644 chapter10/lexer.hpp create mode 100644 chapter10/main.cpp create mode 100644 chapter10/nose.py create mode 100644 chapter10/parser.hpp create mode 100644 chapter10/printer.hpp create mode 100644 chapter10/test/test_fail.pas create mode 100644 chapter10/test/test_ok.pas diff --git a/chapter10/grammar.g b/chapter10/grammar.g new file mode 100644 index 0000000..a37c781 --- /dev/null +++ b/chapter10/grammar.g @@ -0,0 +1,69 @@ +Grammar lsbasi; + +expr + : term (('-' | '+') term ) * + ; + +term + : factor (('*' | '/' | 'DIV' ) factor ) * + ; + +factor + : '+' factor + | '-' factor + | NUM + | ID + | '(' expr ')' + ; + +assignment_statement + : ID ':=' expr + ; + +empty: + ; + +statement + : assignment_statement + | compound_statement + | empty + ; + +program + : 'PROGRAM' ID ';' block '.' + ; + +block + : declarations compound_statement + ; + +declarations + : 'VAR' (variable_declaration ';')+ + | empty + ; + +variable_declaration + : ID (',' ID)* ':' TYPE + ; + +compound_statement + : 'BEGIN' statement_list 'END' + ; + +statement_list + : statement + | statement ';' statement_list + ; + +ID + : "[a-zA-Z][a-zA-Z0-9]*" + ; + +NUM + : "[0-9]+(.[0-9]+)?" + ; + +TYPE + : "INTEGER" + | "REAL" + ; diff --git a/chapter10/interpreter.hpp b/chapter10/interpreter.hpp new file mode 100644 index 0000000..68ca960 --- /dev/null +++ b/chapter10/interpreter.hpp @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include + +namespace lsbasi { + + class Interpreter: public VisitorInterpreter { + + AST *ast; + std::map symbols; + float dispatcher(AST *node){ + return node->handler(this); + } + + float visit(Assign * node){ + std::string var = (static_cast(node->id))->value; + float value = dispatcher(node->expr); + symbols[var] = value; + return value; + } + + float visit(Id * node){ + std::stringstream error; + std::string var = node->value; + if (symbols.count(var)) + return symbols[var]; + else{ + error << "Undefined variable => " << var; + throw std::runtime_error(error.str()); + } + } + + float visit(Num *node){ + return node->value; + } + + float visit(BinOp *node){ + float ret = 0; + + switch(node->token->type){ + case Token::_PLUS: + ret = dispatcher(node->left) + dispatcher(node->right); + break; + case Token::_MINUS: + ret = dispatcher(node->left) - dispatcher(node->right); + break; + case Token::_MUL: + ret = dispatcher(node->left) * dispatcher(node->right); + break; + case Token::_INT_DIV: + ret = int(dispatcher(node->left) / dispatcher(node->right)); + break; + case Token::_REAL_DIV: + ret = dispatcher(node->left) / dispatcher(node->right); + break; + } + return ret; + } + + float visit(UnaryOp *node){ + float ret = 0; + switch(node->token->type){ + case Token::_PLUS: + ret = (+1) * dispatcher(node->fact); + break; + case Token::_MINUS: + ret = (-1) * dispatcher(node->fact); + break; + } + return ret; + } + + float visit(Empty *node){ + return 0; + } + + float visit(StatementList *node){ + float ret = 0; + std::vector::iterator begin = node->statements.begin(); + std::vector::iterator end = node->statements.end(); + for (std::vector::iterator it = begin; it != end; ++it) + ret = dispatcher(*it); + return ret; + } + + float visit(Program *node){ + float ret = 0; + //dispatcher(node->id); + dispatcher(node->bloque); + return ret; + } + + float visit(Block *node){ + float ret = 0; + dispatcher(node->declarations); + dispatcher(node->compound_statement); + return ret; + } + + float visit(DecList *node){ + float ret = 0; + return ret; + } + + float visit(Type *node){ + float ret = 0; + return ret; + } + + float visit(VarDecl *node){ + float ret = 0; + return ret; + } + + public: + + std::string environment(){ + std::stringstream ss; + std::string sep = ""; + ss << "{"; + for (std::map::iterator it=symbols.begin(); it!=symbols.end(); ++it){ + ss << sep << it->first << " => " << it->second; + sep = ","; + } + ss << "}"; + return ss.str(); + } + + Interpreter(AST * ast): ast(ast){} + float interpret (){ + return dispatcher(ast); + } + }; +}; \ No newline at end of file diff --git a/chapter10/lexer.hpp b/chapter10/lexer.hpp new file mode 100644 index 0000000..f6bdeb7 --- /dev/null +++ b/chapter10/lexer.hpp @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include + +namespace lsbasi { + + struct Token { + + enum TokenType { + _INTEGER, + _REAL, + _PLUS, + _MINUS, + _MUL, + _INT_DIV, + _REAL_DIV, + _RPAREN, + _LPAREN, + _ID, + _EOF, + _ASSIGN, + _SEMI, + _BEGIN, + _END, + _DOT, + _TYPE, + _VAR, + _PROGRAM, + _COMMA, + _COLON, + + }; + + TokenType type; + std::string value; + Token(TokenType type, std::string value): type(type), value(value){} + }; + + class Lexer { + + std::string text; + int pos; + char current_char; + + int error () { + throw std::runtime_error("Invalid character"); + } + + void advance () { + pos++; + if (pos > text.size() - 1) + current_char = 0; + else + current_char = text[pos]; + } + + char peek () { + int peek_pos = pos+1; + if (pos + 1 > text.size() - 1) + return 0; + return text[pos + 1]; + } + + void skip_whitespace () { + while((current_char != 0) && (current_char == ' ')) + advance(); + } + + void skip_comment () { + while((current_char != 0) && (current_char != '}')) + advance(); + advance(); + } + + Token * number () { + Token * token; + std::string result; + while((current_char != 0) && std::isdigit(current_char)){ + result += current_char; + advance(); + } + + if (current_char == '.'){ + result += current_char; + advance(); + while((current_char != 0) && std::isdigit(current_char)){ + result += current_char; + advance(); + } + token = new Token(Token::_REAL, result); + }else{ + token = new Token(Token::_INTEGER, result); + } + + return token; + } + + std::string id () { + std::string result; + while((current_char != 0) && (std::isalpha(current_char) || std::isdigit(current_char))){ + result += current_char; + advance(); + } + return result; + } + + public: + + Token * get_next_token () { + while(current_char != 0){ + skip_whitespace(); + if (std::isalpha(current_char)){ + std::string word = id(); + if (word == "BEGIN") + return new Token (Token::_BEGIN, word); + else if (word == "END") + return new Token (Token::_END, word); + else if ((word == "INTEGER") || (word == "REAL")) + return new Token (Token::_TYPE, word); + else if (word == "VAR") + return new Token (Token::_VAR, word); + else if (word == "DIV") + return new Token (Token::_INT_DIV, word); + else if (word == "PROGRAM") + return new Token (Token::_PROGRAM, word); + else + return new Token (Token::_ID, word); + } + if (current_char == '{'){ + advance(); + skip_comment(); + continue; + } + if (std::isdigit(current_char)) + return number(); + if ((current_char == ':') && peek() == '='){ + advance(); + advance(); + return new Token (Token::_ASSIGN, ":="); + } + if (current_char == '+'){ + advance(); + return new Token (Token::_PLUS, "+"); + } + if (current_char == '-'){ + advance(); + return new Token (Token::_MINUS, "-"); + } + if (current_char == '*'){ + advance(); + return new Token (Token::_MUL, "*"); + } + if (current_char == '/'){ + advance(); + return new Token (Token::_REAL_DIV, "/"); + } + if (current_char == '('){ + advance(); + return new Token (Token::_LPAREN, "("); + } + if (current_char == ')'){ + advance(); + return new Token (Token::_RPAREN, ")"); + } + if (current_char == ';'){ + advance(); + return new Token (Token::_SEMI, ";"); + } + if (current_char == '.'){ + advance(); + return new Token (Token::_DOT, "."); + } + if (current_char == ':'){ + advance(); + return new Token (Token::_COLON, ":"); + } + if (current_char == ','){ + advance(); + return new Token (Token::_COMMA, ","); + } + error(); + } + return new Token(Token::_EOF, ""); + } + + Lexer(std::string text): text(text), pos(0), current_char(text[pos]) {} + + }; + +}; \ No newline at end of file diff --git a/chapter10/main.cpp b/chapter10/main.cpp new file mode 100644 index 0000000..98874b0 --- /dev/null +++ b/chapter10/main.cpp @@ -0,0 +1,45 @@ +#include +#include +#include "lexer.hpp" +#include "parser.hpp" +#include "printer.hpp" +#include "interpreter.hpp" + +int main (){ + + //while(true){ + + try{ + + /** Read Input File **/ + + std::stringstream text; + for (std::string line; std::getline(std::cin, line);) { + text << " " << line; + } + + /** Apply Lexer and Parser, Return AST **/ + + lsbasi::Lexer lexer(text.str()); + lsbasi::Parser parser(lexer); + lsbasi::AST *ast = parser.parse(); + + /** Print AST in three different formats **/ + + lsbasi::Printer printer(ast); + std::cout << printer.print() << std::endl; + + /** Intepret AST **/ + + lsbasi::Interpreter interpreter(ast); + interpreter.interpret(); + std::cout << interpreter.environment() << std::endl; + + } catch(std::exception &e){ + std::cout << e.what() << std::endl; + } + + //} + + return 0; +} \ No newline at end of file diff --git a/chapter10/nose.py b/chapter10/nose.py new file mode 100644 index 0000000..d1ccd67 --- /dev/null +++ b/chapter10/nose.py @@ -0,0 +1,634 @@ +""" SPI - Simple Pascal Interpreter. Part 10.""" + +############################################################################### +# # +# LEXER # +# # +############################################################################### + +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER = 'INTEGER' +REAL = 'REAL' +INTEGER_CONST = 'INTEGER_CONST' +REAL_CONST = 'REAL_CONST' +PLUS = 'PLUS' +MINUS = 'MINUS' +MUL = 'MUL' +INTEGER_DIV = 'INTEGER_DIV' +FLOAT_DIV = 'FLOAT_DIV' +LPAREN = 'LPAREN' +RPAREN = 'RPAREN' +ID = 'ID' +ASSIGN = 'ASSIGN' +BEGIN = 'BEGIN' +END = 'END' +SEMI = 'SEMI' +DOT = 'DOT' +PROGRAM = 'PROGRAM' +VAR = 'VAR' +COLON = 'COLON' +COMMA = 'COMMA' +EOF = 'EOF' + + +class Token(object): + def __init__(self, type, value): + self.type = type + self.value = value + + def __str__(self): + """String representation of the class instance. + Examples: + Token(INTEGER_CONST, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({type}, {value})'.format( + type=self.type, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +RESERVED_KEYWORDS = { + 'PROGRAM': Token('PROGRAM', 'PROGRAM'), + 'VAR': Token('VAR', 'VAR'), + 'DIV': Token('INTEGER_DIV', 'DIV'), + 'INTEGER': Token('INTEGER', 'INTEGER'), + 'REAL': Token('REAL', 'REAL'), + 'BEGIN': Token('BEGIN', 'BEGIN'), + 'END': Token('END', 'END'), +} + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "4 + 2 * 3 - 6 / 2" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def peek(self): + peek_pos = self.pos + 1 + if peek_pos > len(self.text) - 1: + return None + else: + return self.text[peek_pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def skip_comment(self): + while self.current_char != '}': + self.advance() + self.advance() # the closing curly brace + + def number(self): + """Return a (multidigit) integer or float consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + + if self.current_char == '.': + result += self.current_char + self.advance() + + while ( + self.current_char is not None and + self.current_char.isdigit() + ): + result += self.current_char + self.advance() + + token = Token('REAL_CONST', float(result)) + else: + token = Token('INTEGER_CONST', int(result)) + + return token + + def _id(self): + """Handle identifiers and reserved keywords""" + result = '' + while self.current_char is not None and self.current_char.isalnum(): + result += self.current_char + self.advance() + + token = RESERVED_KEYWORDS.get(result, Token(ID, result)) + return token + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char == '{': + self.advance() + self.skip_comment() + continue + + if self.current_char.isalpha(): + return self._id() + + if self.current_char.isdigit(): + return self.number() + + if self.current_char == ':' and self.peek() == '=': + self.advance() + self.advance() + return Token(ASSIGN, ':=') + + if self.current_char == ';': + self.advance() + return Token(SEMI, ';') + + if self.current_char == ':': + self.advance() + return Token(COLON, ':') + + if self.current_char == ',': + self.advance() + return Token(COMMA, ',') + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(FLOAT_DIV, '/') + + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + + if self.current_char == '.': + self.advance() + return Token(DOT, '.') + + self.error() + + return Token(EOF, None) + + +############################################################################### +# # +# PARSER # +# # +############################################################################### + +class AST(object): + pass + + +class BinOp(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Num(AST): + def __init__(self, token): + self.token = token + self.value = token.value + + +class UnaryOp(AST): + def __init__(self, op, expr): + self.token = self.op = op + self.expr = expr + + +class Compound(AST): + """Represents a 'BEGIN ... END' block""" + def __init__(self): + self.children = [] + + +class Assign(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Var(AST): + """The Var node is constructed out of ID token.""" + def __init__(self, token): + self.token = token + self.value = token.value + + +class NoOp(AST): + pass + + +class Program(AST): + def __init__(self, name, block): + self.name = name + self.block = block + + +class Block(AST): + def __init__(self, declarations, compound_statement): + self.declarations = declarations + self.compound_statement = compound_statement + + +class VarDecl(AST): + def __init__(self, var_node, type_node): + self.var_node = var_node + self.type_node = type_node + + +class Type(AST): + def __init__(self, token): + self.token = token + self.value = token.value + + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.type == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def program(self): + """program : PROGRAM variable SEMI block DOT""" + self.eat(PROGRAM) + var_node = self.variable() + prog_name = var_node.value + self.eat(SEMI) + block_node = self.block() + program_node = Program(prog_name, block_node) + self.eat(DOT) + return program_node + + def block(self): + """block : declarations compound_statement""" + declaration_nodes = self.declarations() + compound_statement_node = self.compound_statement() + node = Block(declaration_nodes, compound_statement_node) + return node + + def declarations(self): + """declarations : VAR (variable_declaration SEMI)+ + | empty + """ + declarations = [] + if self.current_token.type == VAR: + self.eat(VAR) + while self.current_token.type == ID: + var_decl = self.variable_declaration() + declarations.extend(var_decl) + self.eat(SEMI) + + return declarations + + def variable_declaration(self): + """variable_declaration : ID (COMMA ID)* COLON type_spec""" + var_nodes = [Var(self.current_token)] # first ID + self.eat(ID) + + while self.current_token.type == COMMA: + self.eat(COMMA) + var_nodes.append(Var(self.current_token)) + self.eat(ID) + + self.eat(COLON) + + type_node = self.type_spec() + var_declarations = [ + VarDecl(var_node, type_node) + for var_node in var_nodes + ] + return var_declarations + + def type_spec(self): + """type_spec : INTEGER + | REAL + """ + token = self.current_token + if self.current_token.type == INTEGER: + self.eat(INTEGER) + else: + self.eat(REAL) + node = Type(token) + return node + + def compound_statement(self): + """ + compound_statement: BEGIN statement_list END + """ + self.eat(BEGIN) + nodes = self.statement_list() + self.eat(END) + + root = Compound() + for node in nodes: + root.children.append(node) + + return root + + def statement_list(self): + """ + statement_list : statement + | statement SEMI statement_list + """ + node = self.statement() + + results = [node] + + while self.current_token.type == SEMI: + self.eat(SEMI) + results.append(self.statement()) + + return results + + def statement(self): + """ + statement : compound_statement + | assignment_statement + | empty + """ + if self.current_token.type == BEGIN: + node = self.compound_statement() + elif self.current_token.type == ID: + node = self.assignment_statement() + else: + node = self.empty() + return node + + def assignment_statement(self): + """ + assignment_statement : variable ASSIGN expr + """ + left = self.variable() + token = self.current_token + self.eat(ASSIGN) + right = self.expr() + node = Assign(left, token, right) + return node + + def variable(self): + """ + variable : ID + """ + node = Var(self.current_token) + self.eat(ID) + return node + + def empty(self): + """An empty production""" + return NoOp() + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + """ + node = self.term() + + while self.current_token.type in (PLUS, MINUS): + token = self.current_token + if token.type == PLUS: + self.eat(PLUS) + elif token.type == MINUS: + self.eat(MINUS) + + node = BinOp(left=node, op=token, right=self.term()) + + return node + + def term(self): + """term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)*""" + node = self.factor() + + while self.current_token.type in (MUL, INTEGER_DIV, FLOAT_DIV): + token = self.current_token + if token.type == MUL: + self.eat(MUL) + elif token.type == INTEGER_DIV: + self.eat(INTEGER_DIV) + elif token.type == FLOAT_DIV: + self.eat(FLOAT_DIV) + + node = BinOp(left=node, op=token, right=self.factor()) + + return node + + def factor(self): + """factor : PLUS factor + | MINUS factor + | INTEGER_CONST + | REAL_CONST + | LPAREN expr RPAREN + | variable + """ + token = self.current_token + if token.type == PLUS: + self.eat(PLUS) + node = UnaryOp(token, self.factor()) + return node + elif token.type == MINUS: + self.eat(MINUS) + node = UnaryOp(token, self.factor()) + return node + elif token.type == INTEGER_CONST: + self.eat(INTEGER_CONST) + return Num(token) + elif token.type == REAL_CONST: + self.eat(REAL_CONST) + return Num(token) + elif token.type == LPAREN: + self.eat(LPAREN) + node = self.expr() + self.eat(RPAREN) + return node + else: + node = self.variable() + return node + + def parse(self): + """ + program : PROGRAM variable SEMI block DOT + block : declarations compound_statement + declarations : VAR (variable_declaration SEMI)+ + | empty + variable_declaration : ID (COMMA ID)* COLON type_spec + type_spec : INTEGER + compound_statement : BEGIN statement_list END + statement_list : statement + | statement SEMI statement_list + statement : compound_statement + | assignment_statement + | empty + assignment_statement : variable ASSIGN expr + empty : + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | INTEGER_DIV | FLOAT_DIV) factor)* + factor : PLUS factor + | MINUS factor + | INTEGER_CONST + | REAL_CONST + | LPAREN expr RPAREN + | variable + variable: ID + """ + node = self.program() + if self.current_token.type != EOF: + self.error() + + return node + + +############################################################################### +# # +# INTERPRETER # +# # +############################################################################### + +class NodeVisitor(object): + def visit(self, node): + method_name = 'visit_' + type(node).__name__ + visitor = getattr(self, method_name, self.generic_visit) + return visitor(node) + + def generic_visit(self, node): + raise Exception('No visit_{} method'.format(type(node).__name__)) + + +class Interpreter(NodeVisitor): + def __init__(self, parser): + self.parser = parser + import collections + self.GLOBAL_SCOPE = collections.OrderedDict() + + def visit_Program(self, node): + self.visit(node.block) + + def visit_Block(self, node): + for declaration in node.declarations: + self.visit(declaration) + self.visit(node.compound_statement) + + def visit_VarDecl(self, node): + # Do nothing + pass + + def visit_Type(self, node): + # Do nothing + pass + + def visit_BinOp(self, node): + if node.op.type == PLUS: + return self.visit(node.left) + self.visit(node.right) + elif node.op.type == MINUS: + return self.visit(node.left) - self.visit(node.right) + elif node.op.type == MUL: + return self.visit(node.left) * self.visit(node.right) + elif node.op.type == INTEGER_DIV: + return self.visit(node.left) // self.visit(node.right) + elif node.op.type == FLOAT_DIV: + return float(self.visit(node.left)) / float(self.visit(node.right)) + + def visit_Num(self, node): + return node.value + + def visit_UnaryOp(self, node): + op = node.op.type + if op == PLUS: + return +self.visit(node.expr) + elif op == MINUS: + return -self.visit(node.expr) + + def visit_Compound(self, node): + for child in node.children: + self.visit(child) + + def visit_Assign(self, node): + var_name = node.left.value + self.GLOBAL_SCOPE[var_name] = self.visit(node.right) + + def visit_Var(self, node): + var_name = node.value + var_value = self.GLOBAL_SCOPE.get(var_name) + if var_value is None: + raise NameError(repr(var_name)) + else: + return var_value + + def visit_NoOp(self, node): + pass + + def interpret(self): + tree = self.parser.parse() + if tree is None: + return '' + return self.visit(tree) + + +def main(): + import sys + text = open(sys.argv[1], 'r').read() + + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + result = interpreter.interpret() + + for k, v in sorted(interpreter.GLOBAL_SCOPE.items()): + print('%s = %s' % (k, v)) + + +if __name__ == '__main__': + main() diff --git a/chapter10/parser.hpp b/chapter10/parser.hpp new file mode 100644 index 0000000..4fd6bd5 --- /dev/null +++ b/chapter10/parser.hpp @@ -0,0 +1,399 @@ +#include +#include +#include +#include +#include +#include + +namespace lsbasi { + + struct VisitorPrint { + virtual std::stringstream visit(class Num *ast, int deep) = 0; + virtual std::stringstream visit(class BinOp *ast, int deep) = 0; + virtual std::stringstream visit(class UnaryOp *ast, int deep) = 0; + virtual std::stringstream visit(class Id *ast, int deep) = 0; + virtual std::stringstream visit(class Assign *ast, int deep) = 0; + virtual std::stringstream visit(class Empty *ast, int deep) = 0; + virtual std::stringstream visit(class StatementList *ast, int deep) = 0; + virtual std::stringstream visit(class Program *ast, int deep) = 0; + virtual std::stringstream visit(class Block *ast, int deep) = 0; + virtual std::stringstream visit(class Type *ast, int deep) = 0; + virtual std::stringstream visit(class VarDecl *ast, int deep) = 0; + virtual std::stringstream visit(class DecList *ast, int deep) = 0; + + }; + + struct VisitorInterpreter { + virtual float visit(class Num *ast) = 0; + virtual float visit(class BinOp *ast) = 0; + virtual float visit(class UnaryOp *ast) = 0; + virtual float visit(class Id *ast) = 0; + virtual float visit(class Assign *ast) = 0; + virtual float visit(class Empty *ast) = 0; + virtual float visit(class StatementList *ast) = 0; + virtual float visit(class Program *ast) = 0; + virtual float visit(class Block *ast) = 0; + virtual float visit(class Type *ast) = 0; + virtual float visit(class VarDecl *ast) = 0; + virtual float visit(class DecList *ast) = 0; + }; + + struct AST { + virtual std::stringstream handler(VisitorPrint * v, int deep) = 0; + virtual float handler(VisitorInterpreter * v) = 0; + }; + + struct Num: public AST { + Token * token; + float value; + Num(Token * token):token(token){ + value = std::stoi(token->value); + } + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Id: public AST { + Token * token; + std::string value; + Id(Token * token):token(token),value(token->value){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct BinOp: public AST { + AST * left; + Token * token; + std::string op; + AST * right; + BinOp(Token * token, AST * left, AST * right): token(token), left(left), right(right), op(token->value){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct UnaryOp: public AST { + AST * fact; + Token * token; + std::string op; + UnaryOp(Token * token, AST * fact): token(token), fact(fact), op(token->value){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Assign: public AST { + AST * id; + AST * expr; + Assign(AST * id, AST * expr): id(id), expr(expr){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Empty: public AST { + Empty(){}; + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct StatementList: public AST { + std::vector statements; + StatementList(std::vector statements): statements(statements){}; + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Program: public AST { + AST * id; + AST * bloque; + Program(AST * id, AST * bloque): id(id), bloque(bloque){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Block: public AST { + AST * declarations; + AST * compound_statement; + Block(AST * declarations, AST * compound_statement): + declarations(declarations), + compound_statement(compound_statement){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct DecList: public AST { + std::vector declarations; + DecList(std::vector declarations): declarations(declarations){}; + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct VarDecl: public AST { + std::vector ids; + AST * type; + VarDecl(std::vector ids, AST * type): ids(ids), type(type){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + struct Type: public AST { + Token * token; + std::string value; + Type(Token * token): token(token), value(token->value){} + std::stringstream handler(VisitorPrint * v, int deep){ + return v->visit(this, deep); + } + float handler(VisitorInterpreter * v){ + return v->visit(this); + } + }; + + class Parser { + + Token * current_token; + Lexer lexer; + + int error (std::string description) { + throw std::runtime_error(description); + } + + void eat (Token::TokenType type){ + if (current_token->type == type) + current_token = lexer.get_next_token(); + else + error("Error 0 => " + current_token->value); + } + + AST * factor(){ + + AST * ast; + + Token * tk; + tk = current_token; + + switch(current_token->type){ + case Token::_INTEGER: + ast = new Num(current_token); + eat(Token::_INTEGER); + break; + case Token::_REAL: + ast = new Num(current_token); + eat(Token::_REAL); + break; + case Token::_LPAREN: + eat(Token::_LPAREN); + ast = expr(); + eat(Token::_RPAREN); + break; + case Token::_PLUS: + eat(Token::_PLUS); + ast = new UnaryOp(tk, factor()); + break; + case Token::_MINUS: + eat(Token::_MINUS); + ast = new UnaryOp(tk, factor()); + break; + default: + ast = new Id(tk); + eat(Token::_ID); + break; + } + + return ast; + } + + AST * term(){ + + AST * ast = factor(); + + while((current_token->type == Token::_MUL) + || (current_token->type == Token::_INT_DIV) + || (current_token->type == Token::_REAL_DIV)){ + + Token * tk; + tk = current_token; + + if (current_token->type == Token::_MUL) + eat(Token::_MUL); + else if (current_token->type == Token::_INT_DIV) + eat(Token::_INT_DIV); + else if (current_token->type == Token::_REAL_DIV) + eat(Token::_REAL_DIV); + + ast = new BinOp(tk, ast, factor()); + } + + return ast; + } + + AST * expr(){ + + AST * ast = term(); + + while((current_token->type == Token::_PLUS) || (current_token->type == Token::_MINUS)){ + + Token * tk; + tk = current_token; + + if (current_token->type == Token::_PLUS) + eat(Token::_PLUS); + + else if (current_token->type == Token::_MINUS) + eat(Token::_MINUS); + + ast = new BinOp(tk, ast, term()); + + } + + return ast; + } + + AST * assignment_statement(){ + AST * var = new Id(current_token); + eat(Token::_ID); + eat(Token::_ASSIGN); + AST * ast = new Assign(var, expr()); + return ast; + } + + AST * empty(){ + AST * ast = new Empty(); + return ast; + } + + AST * statement(){ + AST * ast; + + if(current_token->type == Token::_ID){ + ast = assignment_statement(); + }else if(current_token->type == Token::_BEGIN){ + ast = compound_statement(); + }else{ + ast = empty(); + } + return ast; + } + + AST * statement_list(){ + AST * ast; + std::vector statements; + statements.push_back(statement()); + while(current_token->type == Token::_SEMI){ + eat(Token::_SEMI); + statements.push_back(statement()); + } + return new StatementList(statements); + } + + AST * compound_statement(){ + AST * ast; + eat(Token::_BEGIN); + ast = statement_list(); + eat(Token::_END); + return ast; + } + + AST * variable_declaration(){ + std::vector variables; + variables.push_back(new Id(current_token)); + eat(Token::_ID); + while(current_token->type == Token::_COMMA){ + eat(Token::_COMMA); + variables.push_back(new Id(current_token)); + eat(Token::_ID); + } + eat(Token::_COLON); + Token * tk; + tk = current_token; + eat(Token::_TYPE); + return new VarDecl(variables, new Type(tk)); + + } + + AST * declarations(){ + std::vector decs; + if(current_token->type == Token::_VAR){ + eat(Token::_VAR); + decs.push_back(variable_declaration()); + eat(Token::_SEMI); + while (current_token->type == Token::_ID){ + decs.push_back(variable_declaration()); + eat(Token::_SEMI); + } + return new DecList(decs); + }else{ + return empty(); + } + } + + AST * block(){ + return new Block(declarations(), compound_statement()); + } + + AST * program(){ + eat(Token::_PROGRAM); + AST * var = new Id(current_token); + eat(Token::_ID); + eat(Token::_SEMI); + AST * blk = block(); + eat(Token::_DOT); + return new Program(var, blk); + } + + public: + + Parser(lsbasi::Lexer& lexer): lexer(lexer) { + current_token = this->lexer.get_next_token(); + } + + AST * parse(){ + AST * ast = program(); + if (current_token->type != Token::_EOF) + error("Error 2"); + return ast; + } + + }; +}; diff --git a/chapter10/printer.hpp b/chapter10/printer.hpp new file mode 100644 index 0000000..25fbfee --- /dev/null +++ b/chapter10/printer.hpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include + +namespace lsbasi { + + class Printer: public VisitorPrint { + + private: + + AST *ast; + std::stringstream dispatcher(AST *node, int deep){ + return node->handler(this, deep); + } + + std::stringstream print_op(BinOp *node, std::string op, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "BinOp(" << op << ")" << std::endl; + deep += 2; + ss << dispatcher(node->left, deep).str(); + ss << dispatcher(node->right, deep).str(); + return ss; + + } + + std::stringstream visit(Num *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "Num(" << node->value << ")" << std::endl; + return ss; + } + + std::stringstream visit(Id *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "Id(" << node->value << ")" << std::endl; + return ss; + } + + std::stringstream visit(Assign *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "Assignment()" << std::endl; + deep += 2; + ss << dispatcher(node->id, deep).str(); + ss << dispatcher(node->expr, deep).str(); + return ss; + } + + std::stringstream visit(BinOp *node, int deep){ + std::stringstream ss; + ss << print_op(node, node->op, deep).str(); + return ss; + } + + std::stringstream visit(UnaryOp *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "UnaryOp(" << node->op << ")" << std::endl; + ss << dispatcher(node->fact, deep).str(); + return ss; + } + + std::stringstream visit(Empty *node, int deep){ + std::stringstream ss; + return ss; + } + + std::stringstream visit(StatementList *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "StatementList()" << std::endl; + deep += 2; + std::vector::iterator begin = node->statements.begin(); + std::vector::iterator end = node->statements.end(); + for (std::vector::iterator it = begin; it != end; ++it) + ss << dispatcher(*it, deep).str(); + return ss; + } + + std::stringstream visit(Program *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "Program()" << std::endl; + deep += 2; + ss << dispatcher(node->id, deep).str(); + ss << dispatcher(node->bloque, deep).str(); + deep += 2; + return ss; + } + + std::stringstream visit(Block *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "Block()" << std::endl; + deep += 2; + ss << dispatcher(node->declarations, deep).str(); + ss << dispatcher(node->compound_statement, deep).str(); + return ss; + } + + std::stringstream visit(Type *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + deep += 2; + ss << "Type(" << node->value << ")" << std::endl; + return ss; + } + + std::stringstream visit(VarDecl *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "VarDecl()" << std::endl; + deep += 2; + std::vector::iterator begin = node->ids.begin(); + std::vector::iterator end = node->ids.end(); + for (std::vector::iterator it = begin; it != end; ++it) + ss << dispatcher(*it, deep).str(); + ss << dispatcher(node->type, deep).str(); + return ss; + } + + std::stringstream visit(DecList *node, int deep){ + std::stringstream ss; + ss << std::string(deep, ' '); + ss << "DecList()" << std::endl; + deep += 2; + std::vector::iterator begin = node->declarations.begin(); + std::vector::iterator end = node->declarations.end(); + for (std::vector::iterator it = begin; it != end; ++it) + ss << dispatcher(*it, deep).str(); + return ss; + }; + + public: + Printer(AST * ast): ast(ast){} + std::string print (){ + return dispatcher(ast, 0).str(); + } + }; +}; \ No newline at end of file diff --git a/chapter10/test/test_fail.pas b/chapter10/test/test_fail.pas new file mode 100644 index 0000000..fa59abe --- /dev/null +++ b/chapter10/test/test_fail.pas @@ -0,0 +1,6 @@ +BEGIN +a:=1; +b:=2; +c:=a+b; +d:=f*2; +END. \ No newline at end of file diff --git a/chapter10/test/test_ok.pas b/chapter10/test/test_ok.pas new file mode 100644 index 0000000..7a803c8 --- /dev/null +++ b/chapter10/test/test_ok.pas @@ -0,0 +1,10 @@ +PROGRAM Part10AST; +VAR + a, b : INTEGER; + y : REAL; + +BEGIN {Part10AST} + a := 2; + b := 10 * a + 10 * a DIV 4; + y := 20 / 7 + 3.14; +END. {Part10AST} \ No newline at end of file