Skip to content

Commit

Permalink
added disc 7
Browse files Browse the repository at this point in the history
  • Loading branch information
Mimsqueeze committed Oct 11, 2024
1 parent 0cdefe0 commit b6d97bb
Show file tree
Hide file tree
Showing 15 changed files with 353 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This repository will have links to projects and discussions.
+ [Discussion 4](https://github.com/cmsc330fall24/fall2024/blob/main/discussions/discussion4.md)
+ [Discussion 5](https://github.com/cmsc330fall24/fall2024/blob/main/discussions/d5_nfa_dfa/README.md)
+ [Discussion 6](https://github.com/cmsc330fall24/fall2024/tree/main/discussions/d6_nfa_review_cfg)
+ [Discussion 7](https://github.com/cmsc330fall24/fall2024/tree/main/discussions/d7_parsing)

## Discussion Solutions
+ [Discussion 2 Solutions](https://github.com/cmsc330fall24/fall2024/blob/main/discussions/discussion2_sol.md)
Expand Down
3 changes: 3 additions & 0 deletions discussions/d7_parsing/.ocamlinit
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
open D8.Lexer
open D8.Parser
open D8.Interpreter
59 changes: 59 additions & 0 deletions discussions/d7_parsing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Discussion 7 - Friday, October 11th

## Reminders

1. Project 3 due **Monday, October 14 @ 11:59 PM**

## Coding Exercise

- To go from source code to a running program, there are 3 steps (at least for our purposes):

- Tokenizing/Lexing (separating text into smaller tokens)
- Parsing (generating something meaningful from the tokens - an AST)
- Interpreting (evaluating the result of the AST)

- Consider the following grammar:

```
S -> M + S | M
M -> N * M | N
N -> n | (S)
* where n is any integer
```

- This grammar is right associative/recursive. Why did we provide a right associative grammar? What would you do if we didn't?.

- What is the relative precedence of the + and \* operators here? How is it determined? How can we use CFGs to enforce precedence?

### Lexer

- Open `lexer.ml`.
- **NOTES:**
- Take a look at the variant type `token` we have defined
- Keep an index that keeps track of where we are in the string, and move forward as we keep tokenizing.
- It's probably also a good idea to just define all the regex's and store in variables at the top.

### Parser

- Open `parser.ml`.
- **NOTES:**
- Take a look at the variant type `expr` we have defined
- Use `let rec ... and` to write mutually recursive functions.
- `lookahead` returns the head of the list.
- `match` "consumes" the head of the list (provided that the token and head of the list match).
- **IMPORTANT:**
- We're going to write a function named `parse_X` for each nonterminal `X` in our grammar.
- Each of these functions will parse (consume) some tokens, and return (1) the unparsed tokens and (2) the AST which corresponds to the parsed tokens.

### Interpreter

- Open `interpreter.ml`.
- **NOTES:**
- Our `eval` function will take in an AST created by `parser` and evaluate it into an integer
- Recursion is your friend!

## Resources & Additional Readings

- [Cliff's Notes on Grammars](https://bakalian.cs.umd.edu/assets/notes/grammars.pdf)
- [Anwar's Parsing Slides](https://bakalian.cs.umd.edu/assets/slides/16-parsing1.pdf)
1 change: 1 addition & 0 deletions discussions/d7_parsing/dune-project
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(lang dune 2.3)
13 changes: 13 additions & 0 deletions discussions/d7_parsing/sols/interpreter.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
open Parser

(* Evaluater *)

let rec eval (ast : expr) : int =
match ast with
| Int x -> x
| Mult (x, y) -> let x' = eval x in
let y' = eval y in
x' * y'
| Plus (x, y) -> let x' = eval x in
let y' = eval y in
x' + y'
53 changes: 53 additions & 0 deletions discussions/d7_parsing/sols/lexer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
(* Type *)
type token =
| Tok_Int of int
| Tok_Mult
| Tok_Plus
| Tok_LParen
| Tok_RParen
| Tok_EOF

let string_of_token tok = match tok with
| Tok_Int(i) -> string_of_int i
| Tok_Mult -> "*"
| Tok_Plus -> "+"
| Tok_LParen -> "("
| Tok_RParen -> ")"
| Tok_EOF -> ""


let rec string_of_list conv lst =
match lst with
| [] -> ""
| h::[] -> conv h
| h::t -> (conv h) ^ " " ^ (string_of_list conv t)

(* Given source code returns a token list. *)
let rec lexer (input : string) : token list =
let length = String.length input in

let rec tok pos =
if pos >= length then
[Tok_EOF]

else if Str.string_match (Str.regexp "(") input pos then
Tok_LParen::(tok (pos + 1))

else if Str.string_match (Str.regexp ")") input pos then
Tok_RParen::(tok (pos + 1))

else if Str.string_match (Str.regexp "\\+") input pos then
Tok_Plus::(tok (pos + 1))

else if Str.string_match (Str.regexp "\\*") input pos then
Tok_Mult::(tok (pos + 1))

else if Str.string_match (Str.regexp "-?[0-9]+") input pos then
let value = Str.matched_string input in
Tok_Int(int_of_string value)::(tok (pos + String.length value))
else if Str.string_match (Str.regexp " ") input pos then
tok (pos + 1)
else
failwith "lexing error"

in tok 0;;
57 changes: 57 additions & 0 deletions discussions/d7_parsing/sols/lexer_re.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
(* Type *)
type token =
| Tok_Int of int
| Tok_Mult
| Tok_Plus
| Tok_LParen
| Tok_RParen
| Tok_EOF

let string_of_token tok = match tok with
| Tok_Int(i) -> string_of_int i
| Tok_Mult -> "*"
| Tok_Plus -> "+"
| Tok_LParen -> "("
| Tok_RParen -> ")"
| Tok_EOF -> ""

let rec string_of_list conv lst =
match lst with
| [] -> ""
| h::[] -> conv h
| h::t -> (conv h) ^ " " ^ (string_of_list conv t)

(* Given source code returns a token list. *)
let rec lexer (input : string) : token list =
let len = String.length input in

let numre = Re.compile (Re.Perl.re "^(-?[0-9]+)") in
let addre = Re.compile (Re.Perl.re "^\+") in
let mulre = Re.compile (Re.Perl.re "^\*") in
let lpre = Re.compile (Re.Perl.re "^\(") in
let rpre = Re.compile (Re.Perl.re "^\)") in
let wsre = Re.compile (Re.Perl.re "^(\s+)") in

if input = "" then []
else if Re.execp lpre input then
Tok_LParen::(lexer (String.sub input 1 (len - 1)))
else if Re.execp rpre input then
Tok_RParen::(lexer (String.sub input 1 (len - 1)))
else if Re.execp addre input then
Tok_Plus::(lexer (String.sub input 1 (len - 1)))
else if Re.execp mulre input then
Tok_Mult::(lexer (String.sub input 1 (len - 1)))
else if Re.execp numre input then
let numgroup = Re.exec numre input in
let num = Re.Group.get numgroup 1 in
let numlen = String.length num in
let numint = int_of_string num in
Tok_Int(numint)::(lexer (String.sub input numlen (len - numlen)))
else if Re.execp wsre input then
let wsgroup = Re.exec wsre input in
let ws = Re.Group.get wsgroup 1 in
let wslen = String.length ws in
(lexer (String.sub input wslen (len - wslen)))
else
failwith "lexing error"
;;
62 changes: 62 additions & 0 deletions discussions/d7_parsing/sols/parser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
open Lexer

(* Types *)
type expr =
| Int of int
| Plus of expr * expr
| Mult of expr * expr

(* Provided helper function - takes a token list and an exprected token.
* Handles error cases and returns the tail of the list *)
let match_token (toks : token list) (tok : token) : token list =
match toks with
| [] -> raise (Failure(string_of_token tok))
| h::t when h = tok -> t
| h::_ -> raise (Failure(
Printf.sprintf "Expected %s from input %s, got %s"
(string_of_token tok)
(string_of_list string_of_token toks)
(string_of_token h)
))

let lookahead toks = match toks with
h::t -> h
| _ -> raise (Failure("Empty input to lookahead"))


(* Parses a token list. *)
let rec parser (toks : token list) : expr =
let (t, exp) = parse_S toks in
if t <> [Tok_EOF] then
raise (Failure "did not reach EOF")
else
exp

(* Parses the S rule. *)
and parse_S toks =
let (t, m) = parse_M toks in
match lookahead t with
| Tok_Plus -> let t' = match_token t Tok_Plus in
let (t'', s) = parse_S t' in
(t'', Plus (m, s))
| _ -> t, m

(* Parses the M rule. *)
and parse_M toks =
let (t, n) = parse_N toks in
match lookahead t with
| Tok_Mult -> let t' = match_token t Tok_Mult in
let (t'', m) = parse_M t' in
(t'', Mult (n, m))
| _ -> t, n

(* Parses the N rule. *)
and parse_N toks =
match lookahead toks with
| Tok_Int i -> let t = match_token toks (Tok_Int i) in
(t, Int i)
| Tok_LParen -> let t = match_token toks Tok_LParen in
let (t', s) = parse_S t in
let t'' = match_token t' Tok_RParen in
(t'', s)
| _ -> failwith "parse_N failed"
7 changes: 7 additions & 0 deletions discussions/d7_parsing/src/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
(library
(name d8)
(modules lexer parser interpreter)
(libraries str))
(env
(dev
(flags (:standard -w -27-39-33-32))))
6 changes: 6 additions & 0 deletions discussions/d7_parsing/src/interpreter.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
open Parser

(* Evaluater *)

let rec eval (ast : expr) : int =
failwith "unimplemented"
1 change: 1 addition & 0 deletions discussions/d7_parsing/src/interpreter.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
val eval : Parser.expr -> int
26 changes: 26 additions & 0 deletions discussions/d7_parsing/src/lexer.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
(* Type *)
type token =
| Tok_Int of int
| Tok_Mult
| Tok_Plus
| Tok_LParen
| Tok_RParen
| Tok_EOF

let string_of_token tok = match tok with
| Tok_Int(i) -> string_of_int i
| Tok_Mult -> "*"
| Tok_Plus -> "+"
| Tok_LParen -> "("
| Tok_RParen -> ")"
| Tok_EOF -> ""

let rec string_of_list conv lst =
match lst with
| [] -> ""
| h::[] -> conv h
| h::t -> (conv h) ^ " " ^ (string_of_list conv t)

(* Given source code returns a token list. *)
let rec lexer (input : string) : token list =
failwith "unimplemented"
13 changes: 13 additions & 0 deletions discussions/d7_parsing/src/lexer.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
type token =
| Tok_Int of int
| Tok_Mult
| Tok_Plus
| Tok_LParen
| Tok_RParen
| Tok_EOF

val lexer : string -> token list

val string_of_token : token -> string

val string_of_list : ('a -> string) -> 'a list -> string
42 changes: 42 additions & 0 deletions discussions/d7_parsing/src/parser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
open Lexer

(* Types *)
type expr =
| Int of int
| Plus of expr * expr
| Mult of expr * expr

(* Provided helper function - takes a token list and an exprected token.
* Handles error cases and returns the tail of the list *)
let match_token (toks : token list) (tok : token) : token list =
match toks with
| [] -> raise (Failure(string_of_token tok))
| h::t when h = tok -> t
| h::_ -> raise (Failure(
Printf.sprintf "Expected %s from input %s, got %s"
(string_of_token tok)
(string_of_list string_of_token toks)
(string_of_token h)
))

let lookahead toks = match toks with
h::t -> h
| _ -> raise (Failure("Empty input to lookahead"))



(* Parses a token list. *)
let rec parser (toks : token list) : expr =
failwith "unimplemented"

(* Parses the S rule. *)
and parse_S (toks : token list) : (token list * expr) =
failwith "unimplemented"

(* Parses the M rule. *)
and parse_M (toks : token list) : (token list * expr) =
failwith "unimplemented"

(* Parses the N rule. *)
and parse_N (toks : token list) : (token list * expr) =
failwith "unimplemented"
9 changes: 9 additions & 0 deletions discussions/d7_parsing/src/parser.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
type expr =
| Int of int
| Plus of expr * expr
| Mult of expr * expr

val parser : Lexer.token list -> expr
val parse_S : Lexer.token list -> Lexer.token list * expr
val parse_M : Lexer.token list -> Lexer.token list * expr
val parse_N : Lexer.token list -> Lexer.token list * expr

0 comments on commit b6d97bb

Please sign in to comment.