Skip to content
This repository was archived by the owner on Nov 11, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

159 changes: 159 additions & 0 deletions src/lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
use crate::token::{lookup_ident, Token, TokenType};

pub struct Lexer {
pub input: String, // the source code
pub position: usize, // current position in input (points to current char)
pub read_position: usize, // current reading position in input (after current char)
pub ch: char, // current char under examination
}

impl Lexer {
pub fn new(input: &str) -> Self {
let mut lexer = Lexer {
input: input.to_string(),
position: 0,
read_position: 0,
ch: '\0',
};
lexer.read_char();
lexer
}

/// Yield the next character and advance our position
pub fn read_char(&mut self) {
self.ch = self.input.chars().nth(self.read_position).unwrap_or('\0');

self.position = self.read_position;
self.read_position += 1;
}

/// Similar to read_char() but does not increment positions
pub fn peek_char(&mut self) -> char {
self.input.chars().nth(self.read_position).unwrap_or('\0')
}

/// Look at the current character and return the appropriate Token
pub fn next_token(&mut self) -> Token {
let tok: Token;

self.eat_whitespace();

match self.ch {
'=' => {
if self.peek_char() == '=' {
self.read_char();

tok = Token {
token_type: TokenType::Eq,
literal: String::from("=="),
}
} else {
tok = new_token(TokenType::Assign, self.ch);
}
}
'+' => tok = new_token(TokenType::Plus, self.ch),
'-' => tok = new_token(TokenType::Minus, self.ch),
'!' => {
if self.peek_char() == '=' {
self.read_char();

tok = Token {
token_type: TokenType::Noteq,
literal: String::from("!="),
}
} else {
tok = new_token(TokenType::Bang, self.ch);
}
}
'/' => tok = new_token(TokenType::Slash, self.ch),
'*' => tok = new_token(TokenType::Asterisk, self.ch),
'<' => tok = new_token(TokenType::Lt, self.ch),
'>' => tok = new_token(TokenType::Gt, self.ch),
'(' => tok = new_token(TokenType::Lparen, self.ch),
')' => tok = new_token(TokenType::Rparen, self.ch),
'{' => tok = new_token(TokenType::Lbrace, self.ch),
'}' => tok = new_token(TokenType::Rbrace, self.ch),
',' => tok = new_token(TokenType::Comma, self.ch),
';' => tok = new_token(TokenType::Semicolon, self.ch),
'\0' => {
// Early return to prevent advancing the read position
return Token {
token_type: TokenType::Eof,
literal: String::from(""),
};
}
_ => {
if is_letter(self.ch) {
let identifier = self.read_identifier();
let token_type = lookup_ident(identifier);

// Early return to prevent advancing the read position
return Token {
token_type,
literal: identifier.to_string(),
};
} else if is_digit(self.ch) {
let token_type = TokenType::Int;
let literal = self.read_number();

// Early return to prevent advancing the read position
return Token {
token_type,
literal: literal.to_string(),
};
} else {
tok = new_token(TokenType::Illegal, self.ch)
}
}
}

self.read_char();
tok
}

/// Read a multi-char Token, like identifiers
fn read_identifier(&mut self) -> &str {
let start = self.position;

while is_letter(self.ch) {
self.read_char();
}

&self.input[start..self.position]
}

fn read_number(&mut self) -> &str {
// TODO: add support for floats, hex, octal and binary
let start = self.position;

while is_digit(self.ch) {
self.read_char();
}

&self.input[start..self.position]
}

fn eat_whitespace(&mut self) {
while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\r' {
self.read_char();
}
}
}

/// Helper method to create a new Token from a type and a char
fn new_token(token_type: TokenType, ch: char) -> Token {
Token {
token_type,
literal: ch.to_string(),
}
}

/// Helper method to check if a char is a letter
/// This function has a large impact on the language our interpreter will support
fn is_letter(ch: char) -> bool {
'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

fn is_digit(ch: char) -> bool {
'0' <= ch && ch <= '9'
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod lexer;
pub mod token;
59 changes: 59 additions & 0 deletions src/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#[derive(Debug, PartialEq)]
pub enum TokenType {
// Special types
Illegal, // for unsupported tokens
Eof, // mark the end of file

// Identifiers + literals
Ident, // add, foobar, x, y, ...
Int, // 1343456

// Operators
Assign, // =
Plus, // +
Minus, // -
Bang, // !
Asterisk, // *
Slash, // /
Lt, // <
Gt, // >
Eq, // ==
Noteq, // !=

// Delimiters
Comma, // ,
Semicolon, // ;

Lparen, // (
Rparen, // )
Lbrace, // {
Rbrace, // }

// Keywords
Function, // fn
Let, // let
True, // true
False, // false
If, // if
Else, // else
Return, // return
}

pub struct Token {
pub token_type: TokenType,
pub literal: String,
}

pub fn lookup_ident(ident: &str) -> TokenType {
// TODO: extract this mapping to a constant?
match ident {
"let" => TokenType::Let,
"fn" => TokenType::Function,
"true" => TokenType::True,
"false" => TokenType::False,
"if" => TokenType::If,
"else" => TokenType::Else,
"return" => TokenType::Return,
_ => TokenType::Ident,
}
}
122 changes: 122 additions & 0 deletions tests/lexer_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use rusty_monkey::{
lexer::Lexer,
token::{Token, TokenType},
};

#[test]
fn test_next_token() {
let input = "
let five = 5;
let ten = 10;

let add = fn(x, y) {
x + y
};

let result = add(five, ten);
!-/*5;
5 < 10 > 5;

if (5 < 10) {
return true;
} else {
return false;
}

10 == 10;
10 != 9;
";

let tests = [
(TokenType::Let, "let"),
(TokenType::Ident, "five"),
(TokenType::Assign, "="),
(TokenType::Int, "5"),
(TokenType::Semicolon, ";"),
(TokenType::Let, "let"),
(TokenType::Ident, "ten"),
(TokenType::Assign, "="),
(TokenType::Int, "10"),
(TokenType::Semicolon, ";"),
(TokenType::Let, "let"),
(TokenType::Ident, "add"),
(TokenType::Assign, "="),
(TokenType::Function, "fn"),
(TokenType::Lparen, "("),
(TokenType::Ident, "x"),
(TokenType::Comma, ","),
(TokenType::Ident, "y"),
(TokenType::Rparen, ")"),
(TokenType::Lbrace, "{"),
(TokenType::Ident, "x"),
(TokenType::Plus, "+"),
(TokenType::Ident, "y"),
(TokenType::Rbrace, "}"),
(TokenType::Semicolon, ";"),
(TokenType::Let, "let"),
(TokenType::Ident, "result"),
(TokenType::Assign, "="),
(TokenType::Ident, "add"),
(TokenType::Lparen, "("),
(TokenType::Ident, "five"),
(TokenType::Comma, ","),
(TokenType::Ident, "ten"),
(TokenType::Rparen, ")"),
(TokenType::Semicolon, ";"),
(TokenType::Bang, "!"),
(TokenType::Minus, "-"),
(TokenType::Slash, "/"),
(TokenType::Asterisk, "*"),
(TokenType::Int, "5"),
(TokenType::Semicolon, ";"),
(TokenType::Int, "5"),
(TokenType::Lt, "<"),
(TokenType::Int, "10"),
(TokenType::Gt, ">"),
(TokenType::Int, "5"),
(TokenType::Semicolon, ";"),
(TokenType::If, "if"),
(TokenType::Lparen, "("),
(TokenType::Int, "5"),
(TokenType::Lt, "<"),
(TokenType::Int, "10"),
(TokenType::Rparen, ")"),
(TokenType::Lbrace, "{"),
(TokenType::Return, "return"),
(TokenType::True, "true"),
(TokenType::Semicolon, ";"),
(TokenType::Rbrace, "}"),
(TokenType::Else, "else"),
(TokenType::Lbrace, "{"),
(TokenType::Return, "return"),
(TokenType::False, "false"),
(TokenType::Semicolon, ";"),
(TokenType::Rbrace, "}"),
(TokenType::Int, "10"),
(TokenType::Eq, "=="),
(TokenType::Int, "10"),
(TokenType::Semicolon, ";"),
(TokenType::Int, "10"),
(TokenType::Noteq, "!="),
(TokenType::Int, "9"),
(TokenType::Semicolon, ";"),
(TokenType::Eof, ""),
];

let mut lexer = Lexer::new(input);

for (i, (token_type, literal)) in tests.iter().enumerate() {
let tok: Token = lexer.next_token();

assert_eq!(
&tok.token_type, token_type,
"tests[{}] - token_type wrong. expected={:?}, got={:?}",
i, token_type, tok.token_type
);
assert_eq!(
&tok.literal, literal,
"tests[{}] - literal wrong. expected={:?}, got={:?}",
i, literal, tok.literal
);
}
}