-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0cdefe0
commit b6d97bb
Showing
15 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
open D8.Lexer | ||
open D8.Parser | ||
open D8.Interpreter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Discussion 7 - Friday, October 11th | ||
|
||
## Reminders | ||
|
||
1. Project 3 due **Monday, October 14 @ 11:59 PM** | ||
|
||
## Coding Exercise | ||
|
||
- To go from source code to a running program, there are 3 steps (at least for our purposes): | ||
|
||
- Tokenizing/Lexing (separating text into smaller tokens) | ||
- Parsing (generating something meaningful from the tokens - an AST) | ||
- Interpreting (evaluating the result of the AST) | ||
|
||
- Consider the following grammar: | ||
|
||
``` | ||
S -> M + S | M | ||
M -> N * M | N | ||
N -> n | (S) | ||
* where n is any integer | ||
``` | ||
|
||
- This grammar is right associative/recursive. Why did we provide a right associative grammar? What would you do if we didn't?. | ||
|
||
- What is the relative precedence of the + and \* operators here? How is it determined? How can we use CFGs to enforce precedence? | ||
|
||
### Lexer | ||
|
||
- Open `lexer.ml`. | ||
- **NOTES:** | ||
- Take a look at the variant type `token` we have defined | ||
- Keep an index that keeps track of where we are in the string, and move forward as we keep tokenizing. | ||
- It's probably also a good idea to just define all the regex's and store in variables at the top. | ||
|
||
### Parser | ||
|
||
- Open `parser.ml`. | ||
- **NOTES:** | ||
- Take a look at the variant type `expr` we have defined | ||
- Use `let rec ... and` to write mutually recursive functions. | ||
- `lookahead` returns the head of the list. | ||
- `match` "consumes" the head of the list (provided that the token and head of the list match). | ||
- **IMPORTANT:** | ||
- We're going to write a function named `parse_X` for each nonterminal `X` in our grammar. | ||
- Each of these functions will parse (consume) some tokens, and return (1) the unparsed tokens and (2) the AST which corresponds to the parsed tokens. | ||
|
||
### Interpreter | ||
|
||
- Open `interpreter.ml`. | ||
- **NOTES:** | ||
- Our `eval` function will take in an AST created by `parser` and evaluate it into an integer | ||
- Recursion is your friend! | ||
|
||
## Resources & Additional Readings | ||
|
||
- [Cliff's Notes on Grammars](https://bakalian.cs.umd.edu/assets/notes/grammars.pdf) | ||
- [Anwar's Parsing Slides](https://bakalian.cs.umd.edu/assets/slides/16-parsing1.pdf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
(lang dune 2.3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
open Parser | ||
|
||
(* Evaluater *) | ||
|
||
let rec eval (ast : expr) : int = | ||
match ast with | ||
| Int x -> x | ||
| Mult (x, y) -> let x' = eval x in | ||
let y' = eval y in | ||
x' * y' | ||
| Plus (x, y) -> let x' = eval x in | ||
let y' = eval y in | ||
x' + y' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
(* Type *) | ||
type token = | ||
| Tok_Int of int | ||
| Tok_Mult | ||
| Tok_Plus | ||
| Tok_LParen | ||
| Tok_RParen | ||
| Tok_EOF | ||
|
||
let string_of_token tok = match tok with | ||
| Tok_Int(i) -> string_of_int i | ||
| Tok_Mult -> "*" | ||
| Tok_Plus -> "+" | ||
| Tok_LParen -> "(" | ||
| Tok_RParen -> ")" | ||
| Tok_EOF -> "" | ||
|
||
|
||
let rec string_of_list conv lst = | ||
match lst with | ||
| [] -> "" | ||
| h::[] -> conv h | ||
| h::t -> (conv h) ^ " " ^ (string_of_list conv t) | ||
|
||
(* Given source code returns a token list. *) | ||
let rec lexer (input : string) : token list = | ||
let length = String.length input in | ||
|
||
let rec tok pos = | ||
if pos >= length then | ||
[Tok_EOF] | ||
|
||
else if Str.string_match (Str.regexp "(") input pos then | ||
Tok_LParen::(tok (pos + 1)) | ||
|
||
else if Str.string_match (Str.regexp ")") input pos then | ||
Tok_RParen::(tok (pos + 1)) | ||
|
||
else if Str.string_match (Str.regexp "\\+") input pos then | ||
Tok_Plus::(tok (pos + 1)) | ||
|
||
else if Str.string_match (Str.regexp "\\*") input pos then | ||
Tok_Mult::(tok (pos + 1)) | ||
|
||
else if Str.string_match (Str.regexp "-?[0-9]+") input pos then | ||
let value = Str.matched_string input in | ||
Tok_Int(int_of_string value)::(tok (pos + String.length value)) | ||
else if Str.string_match (Str.regexp " ") input pos then | ||
tok (pos + 1) | ||
else | ||
failwith "lexing error" | ||
|
||
in tok 0;; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
(* Type *) | ||
type token = | ||
| Tok_Int of int | ||
| Tok_Mult | ||
| Tok_Plus | ||
| Tok_LParen | ||
| Tok_RParen | ||
| Tok_EOF | ||
|
||
let string_of_token tok = match tok with | ||
| Tok_Int(i) -> string_of_int i | ||
| Tok_Mult -> "*" | ||
| Tok_Plus -> "+" | ||
| Tok_LParen -> "(" | ||
| Tok_RParen -> ")" | ||
| Tok_EOF -> "" | ||
|
||
let rec string_of_list conv lst = | ||
match lst with | ||
| [] -> "" | ||
| h::[] -> conv h | ||
| h::t -> (conv h) ^ " " ^ (string_of_list conv t) | ||
|
||
(* Given source code returns a token list. *) | ||
let rec lexer (input : string) : token list = | ||
let len = String.length input in | ||
|
||
let numre = Re.compile (Re.Perl.re "^(-?[0-9]+)") in | ||
let addre = Re.compile (Re.Perl.re "^\+") in | ||
let mulre = Re.compile (Re.Perl.re "^\*") in | ||
let lpre = Re.compile (Re.Perl.re "^\(") in | ||
let rpre = Re.compile (Re.Perl.re "^\)") in | ||
let wsre = Re.compile (Re.Perl.re "^(\s+)") in | ||
|
||
if input = "" then [] | ||
else if Re.execp lpre input then | ||
Tok_LParen::(lexer (String.sub input 1 (len - 1))) | ||
else if Re.execp rpre input then | ||
Tok_RParen::(lexer (String.sub input 1 (len - 1))) | ||
else if Re.execp addre input then | ||
Tok_Plus::(lexer (String.sub input 1 (len - 1))) | ||
else if Re.execp mulre input then | ||
Tok_Mult::(lexer (String.sub input 1 (len - 1))) | ||
else if Re.execp numre input then | ||
let numgroup = Re.exec numre input in | ||
let num = Re.Group.get numgroup 1 in | ||
let numlen = String.length num in | ||
let numint = int_of_string num in | ||
Tok_Int(numint)::(lexer (String.sub input numlen (len - numlen))) | ||
else if Re.execp wsre input then | ||
let wsgroup = Re.exec wsre input in | ||
let ws = Re.Group.get wsgroup 1 in | ||
let wslen = String.length ws in | ||
(lexer (String.sub input wslen (len - wslen))) | ||
else | ||
failwith "lexing error" | ||
;; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
open Lexer | ||
|
||
(* Types *) | ||
type expr = | ||
| Int of int | ||
| Plus of expr * expr | ||
| Mult of expr * expr | ||
|
||
(* Provided helper function - takes a token list and an exprected token. | ||
* Handles error cases and returns the tail of the list *) | ||
let match_token (toks : token list) (tok : token) : token list = | ||
match toks with | ||
| [] -> raise (Failure(string_of_token tok)) | ||
| h::t when h = tok -> t | ||
| h::_ -> raise (Failure( | ||
Printf.sprintf "Expected %s from input %s, got %s" | ||
(string_of_token tok) | ||
(string_of_list string_of_token toks) | ||
(string_of_token h) | ||
)) | ||
|
||
let lookahead toks = match toks with | ||
h::t -> h | ||
| _ -> raise (Failure("Empty input to lookahead")) | ||
|
||
|
||
(* Parses a token list. *) | ||
let rec parser (toks : token list) : expr = | ||
let (t, exp) = parse_S toks in | ||
if t <> [Tok_EOF] then | ||
raise (Failure "did not reach EOF") | ||
else | ||
exp | ||
|
||
(* Parses the S rule. *) | ||
and parse_S toks = | ||
let (t, m) = parse_M toks in | ||
match lookahead t with | ||
| Tok_Plus -> let t' = match_token t Tok_Plus in | ||
let (t'', s) = parse_S t' in | ||
(t'', Plus (m, s)) | ||
| _ -> t, m | ||
|
||
(* Parses the M rule. *) | ||
and parse_M toks = | ||
let (t, n) = parse_N toks in | ||
match lookahead t with | ||
| Tok_Mult -> let t' = match_token t Tok_Mult in | ||
let (t'', m) = parse_M t' in | ||
(t'', Mult (n, m)) | ||
| _ -> t, n | ||
|
||
(* Parses the N rule. *) | ||
and parse_N toks = | ||
match lookahead toks with | ||
| Tok_Int i -> let t = match_token toks (Tok_Int i) in | ||
(t, Int i) | ||
| Tok_LParen -> let t = match_token toks Tok_LParen in | ||
let (t', s) = parse_S t in | ||
let t'' = match_token t' Tok_RParen in | ||
(t'', s) | ||
| _ -> failwith "parse_N failed" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
(library | ||
(name d8) | ||
(modules lexer parser interpreter) | ||
(libraries str)) | ||
(env | ||
(dev | ||
(flags (:standard -w -27-39-33-32)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
open Parser | ||
|
||
(* Evaluater *) | ||
|
||
let rec eval (ast : expr) : int = | ||
failwith "unimplemented" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
val eval : Parser.expr -> int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
(* Type *) | ||
type token = | ||
| Tok_Int of int | ||
| Tok_Mult | ||
| Tok_Plus | ||
| Tok_LParen | ||
| Tok_RParen | ||
| Tok_EOF | ||
|
||
let string_of_token tok = match tok with | ||
| Tok_Int(i) -> string_of_int i | ||
| Tok_Mult -> "*" | ||
| Tok_Plus -> "+" | ||
| Tok_LParen -> "(" | ||
| Tok_RParen -> ")" | ||
| Tok_EOF -> "" | ||
|
||
let rec string_of_list conv lst = | ||
match lst with | ||
| [] -> "" | ||
| h::[] -> conv h | ||
| h::t -> (conv h) ^ " " ^ (string_of_list conv t) | ||
|
||
(* Given source code returns a token list. *) | ||
let rec lexer (input : string) : token list = | ||
failwith "unimplemented" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
type token = | ||
| Tok_Int of int | ||
| Tok_Mult | ||
| Tok_Plus | ||
| Tok_LParen | ||
| Tok_RParen | ||
| Tok_EOF | ||
|
||
val lexer : string -> token list | ||
|
||
val string_of_token : token -> string | ||
|
||
val string_of_list : ('a -> string) -> 'a list -> string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
open Lexer | ||
|
||
(* Types *) | ||
type expr = | ||
| Int of int | ||
| Plus of expr * expr | ||
| Mult of expr * expr | ||
|
||
(* Provided helper function - takes a token list and an exprected token. | ||
* Handles error cases and returns the tail of the list *) | ||
let match_token (toks : token list) (tok : token) : token list = | ||
match toks with | ||
| [] -> raise (Failure(string_of_token tok)) | ||
| h::t when h = tok -> t | ||
| h::_ -> raise (Failure( | ||
Printf.sprintf "Expected %s from input %s, got %s" | ||
(string_of_token tok) | ||
(string_of_list string_of_token toks) | ||
(string_of_token h) | ||
)) | ||
|
||
let lookahead toks = match toks with | ||
h::t -> h | ||
| _ -> raise (Failure("Empty input to lookahead")) | ||
|
||
|
||
|
||
(* Parses a token list. *) | ||
let rec parser (toks : token list) : expr = | ||
failwith "unimplemented" | ||
|
||
(* Parses the S rule. *) | ||
and parse_S (toks : token list) : (token list * expr) = | ||
failwith "unimplemented" | ||
|
||
(* Parses the M rule. *) | ||
and parse_M (toks : token list) : (token list * expr) = | ||
failwith "unimplemented" | ||
|
||
(* Parses the N rule. *) | ||
and parse_N (toks : token list) : (token list * expr) = | ||
failwith "unimplemented" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
type expr = | ||
| Int of int | ||
| Plus of expr * expr | ||
| Mult of expr * expr | ||
|
||
val parser : Lexer.token list -> expr | ||
val parse_S : Lexer.token list -> Lexer.token list * expr | ||
val parse_M : Lexer.token list -> Lexer.token list * expr | ||
val parse_N : Lexer.token list -> Lexer.token list * expr |