update

DaviRain-Su · Oct 10, 2023 · daaf02e · daaf02e
1 parent 174de12
commit daaf02e
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 63 deletions.
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
@@ -1,6 +1,22 @@
 use crate::error::Error;
 use crate::token::token_type::TokenType;
 use crate::token::{token_type, Token};
+// use nom::bytes::complete::take_until;
+// use nom::bytes::complete::take_while;
+// use nom::character::complete::anychar;
+// use nom::character::complete::none_of;
+// use nom::character::complete::{alpha1, digit1, multispace0, one_of};
+// use nom::combinator::map;
+// use nom::multi::many0;
+// use nom::sequence::terminated;
+// use nom::sequence::{delimited, preceded, tuple};
+// use nom::{branch::alt, multi::many1};
+// use nom::{
+//     bytes::complete::tag,
+//     character::complete::{char, space0},
+// };
+// use nom::{bytes::complete::take, error::ErrorKind, IResult};
+use std::str;
 
 #[cfg(test)]
 mod tests;
@@ -70,66 +86,24 @@ impl<'a> Lexer<'a> {
                 if self.peek_char() == '=' {
                     let ch = self.ch;
                     self.read_char()?;
-                    let literal = String::from(ch) + &String::from(self.ch);
+                    let literal = format!("{}{}", ch, self.ch);
                     tok = Token::from_string(TokenType::EQ, literal);
                 } else {
-                    tok = Token::from_char(TokenType::ASSIGN, self.ch);
+                    tok = Token::from_char(token_type::lookup_char(self.ch), self.ch);
                 }
             }
-            '-' => {
-                tok = Token::from_char(TokenType::MINUS, self.ch);
-            }
             '!' => {
                 if self.peek_char() == '=' {
                     let ch = self.ch;
                     self.read_char()?;
                     let literal = String::from(ch) + &String::from(self.ch);
                     tok = Token::from_string(TokenType::NOTEQ, literal);
                 } else {
-                    tok = Token::from_char(TokenType::BANG, self.ch);
+                    tok = Token::from_char(token_type::lookup_char(self.ch), self.ch);
                 }
             }
-            '/' => {
-                tok = Token::from_char(TokenType::SLASH, self.ch);
-            }
-            '*' => {
-                tok = Token::from_char(TokenType::ASTERISK, self.ch);
-            }
-            '<' => {
-                tok = Token::from_char(TokenType::LT, self.ch);
-            }
-            '>' => {
-                tok = Token::from_char(TokenType::GT, self.ch);
-            }
-            ';' => {
-                tok = Token::from_char(TokenType::SEMICOLON, self.ch);
-            }
-            '(' => {
-                tok = Token::from_char(TokenType::LPAREN, self.ch);
-            }
-            ')' => {
-                tok = Token::from_char(TokenType::RPAREN, self.ch);
-            }
-            ',' => {
-                tok = Token::from_char(TokenType::COMMA, self.ch);
-            }
-            '+' => {
-                tok = Token::from_char(TokenType::PLUS, self.ch);
-            }
-            '{' => {
-                tok = Token::from_char(TokenType::LBRACE, self.ch);
-            }
-            '}' => {
-                tok = Token::from_char(TokenType::RBRACE, self.ch);
-            }
-            '[' => {
-                tok = Token::from_char(TokenType::LBRACKET, self.ch);
-            }
-            ']' => {
-                tok = Token::from_char(TokenType::RBRACKET, self.ch);
-            }
-            ':' => {
-                tok = Token::from_char(TokenType::COLON, self.ch);
+            ch if "+-/*<>;(),:{}[]".contains(ch) => {
+                tok = Token::from_char(token_type::lookup_char(ch), self.ch);
             }
             '"' => {
                 tok = Token::from_string(TokenType::STRING, self.read_string()?);
@@ -166,6 +140,53 @@ impl<'a> Lexer<'a> {
 
         Ok(self.input[position..self.position].to_string())
     }
+
+    // fn parse_string(input: &str) -> IResult<&str, &str> {
+    //     // TODO: (need to open in the future)
+    //     let (input, _) = tag("\"")(input)?;
+    //     let (input, content) = take_while(|c| c != '"')(input)?;
+    //     let (input, _) = tag("\"")(input)?;
+    //     Ok((input, content))
+    // }
+
+    // fn read_string_v1(&mut self) -> anyhow::Result<String> {
+    //     match Lexer::parse_string(&self.input[self.position..]) {
+    //         Ok((remaining, content)) => {
+    //             println!("remain: {}, content: {}", remaining, content);
+    //             // 更新position
+    //             self.position += self.input[self.position..].len() - remaining.len();
+    //             Ok(content.to_string())
+    //         }
+    //         Err(_) => Err(anyhow::anyhow!("Failed to parse string")),
+    //     }
+    // }
+
+    // fn parse_token(input: &str) -> IResult<&str, Token> {
+    //     nom::branch::alt((
+    //         map(alpha1, |s: &str| {
+    //             let token_type = token_type::lookup_ident(s);
+    //             Token::from_string(token_type, s.to_string())
+    //         }),
+    //         map(digit1, |s: &str| {
+    //             Token::from_string(TokenType::INT, s.to_string())
+    //         }),
+    //         map(one_of("+-/*<>;(),:{}[]"), |ch: char| {
+    //             Token::from_char(token_type::lookup_char(ch), ch)
+    //         }),
+    //         map(
+    //             delimited(tag("\""), take_until("\""), tag("\"")),
+    //             |s: &str| Token::from_string(TokenType::STRING, s.to_string()),
+    //         ),
+    //     ))(input)
+    // }
+
+    // pub fn next_token_v1(&mut self) -> anyhow::Result<Token> {
+    //     let (_, token) = Self::parse_token(&self.input[self.position..])
+    //         .map_err(|e| anyhow::anyhow!("Failed to parse token -> {:?}", e.to_string()))?;
+    //     self.position += token.literal().len();
+    //     Ok(token)
+    // }
+
     /// 先处理标识符和关键字。对于这两者，词法分析器需要识别当前字符是否为字母。
     /// 如果是，则还需要读取标识符/关键字的剩余部分，直到遇见非字母字符为止。读取完
     /// 该标识符/关键字之后，还需要判断它到底是标识符还是关键字，以便使用正确的

diff --git a/src/lexer/tests.rs b/src/lexer/tests.rs
@@ -119,26 +119,13 @@ if ( 5 < 10 ) {
     let mut l = Lexer::new(input)?;
     for (i, tt) in tests.iter().enumerate() {
         let tok = l.next_token()?;
-
-        println!("token = {:?}", tok);
-
         if tok.token_type() != tt.token_type() {
             println!(
-                "tests[{}] - token type wrong. expected = {:?}, \
+                "tests[{}] - token type wrong. expected({}) = {:?}, \
                    got = {:?}
                 ",
                 i,
-                tt.token_type(),
-                tok.token_type()
-            );
-        }
-
-        if tok.token_type() != tt.token_type() {
-            println!(
-                "tests[{}] - literal wrong. expected = {:?}, \
-                got = {:?}
-                ",
-                i,
+                tt.literal(),
                 tt.token_type(),
                 tok.token_type()
             );
@@ -151,5 +138,6 @@ if ( 5 < 10 ) {
 #[test]
 fn test_test_next_token() {
     let ret = test_next_token();
-    println!("test_test_next_token: ret = {:?}", ret);
+    // assert!(ret.is_ok());
+    println!("{ret:?}");
 }
diff --git a/src/main.rs b/src/main.rs
@@ -1,3 +1,5 @@
+#![allow(unused_imports)]
+
 #[macro_use]
 extern crate lazy_static;
 extern crate core;

diff --git a/src/token/token_type.rs b/src/token/token_type.rs
@@ -116,3 +116,27 @@ pub fn lookup_ident(ident: &str) -> TokenType {
         None => TokenType::IDENT,
     }
 }
+
+// +-/*<>;(),:{}[]
+pub fn lookup_char(ch: char) -> TokenType {
+    match ch {
+        '/' => TokenType::SLASH,
+        '*' => TokenType::ASTERISK,
+        '<' => TokenType::LT,
+        '>' => TokenType::GT,
+        ';' => TokenType::SEMICOLON,
+        '(' => TokenType::LPAREN,
+        ')' => TokenType::RPAREN,
+        ',' => TokenType::COMMA,
+        '+' => TokenType::PLUS,
+        '{' => TokenType::LBRACE,
+        '}' => TokenType::RBRACE,
+        '[' => TokenType::LBRACKET,
+        ']' => TokenType::RBRACKET,
+        ':' => TokenType::COLON,
+        '-' => TokenType::MINUS,
+        '!' => TokenType::BANG,
+        '=' => TokenType::ASSIGN,
+        _ => TokenType::ILLEGAL,
+    }
+}