Merge pull request #19 from mkpro118/enhance-scanner-and-parser

mkpro118 · web-flow · commit 54003da92a57 · 2025-09-14T04:07:12.000-07:00
Add lexer recovery; improve string/number lexing; derive Eq/Hash
diff --git a/src/core/parser/ast/mod.rs b/src/core/parser/ast/mod.rs
@@ -607,7 +607,7 @@ pub struct BlockAttribute {
 /// assert!(q.is_simple());
 /// assert_eq!(q.as_simple().unwrap().text, "String");
 /// ```
-#[derive(Debug, Clone, AstContainerNode)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
 pub struct QualifiedIdent {
     /// The identifier parts (e.g., `["db", "VarChar"]`).
     pub parts: Vec<Ident>,
@@ -660,7 +660,7 @@ impl QualifiedIdent {
 /// assert_eq!(named.node_type(), "NamedType");
 /// assert_eq!(list.node_type(), "ListType");
 /// ```
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum TypeRef {
     /// A named type (scalar, enum, or model reference).
     Named(NamedType),
@@ -713,7 +713,7 @@ impl AstNode for TypeRef {
 /// };
 /// assert_eq!(nt.node_type(), "NamedType");
 /// ```
-#[derive(Debug, Clone, AstContainerNode)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
 pub struct NamedType {
     /// The type name (may be qualified for built-ins).
     pub name: QualifiedIdent,
@@ -743,7 +743,7 @@ pub struct NamedType {
 /// let list = ListType { inner: Box::new(inner), span: sp((1,1),(1,6)) };
 /// assert_eq!(list.node_type(), "ListType");
 /// ```
-#[derive(Debug, Clone, AstContainerNode)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
 pub struct ListType {
     /// The inner type.
     pub inner: Box<TypeRef>,
@@ -1076,7 +1076,7 @@ impl AstNode for ObjectKey {}
 /// An identifier with its source text and span.
 ///
 /// Keeps the original identifier text and a span over it.
-#[derive(Debug, Clone, AstLeafNode)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, AstLeafNode)]
 pub struct Ident {
     /// The identifier text.
     pub text: String,
diff --git a/src/core/parser/components/attributes.rs b/src/core/parser/components/attributes.rs
@@ -802,8 +802,6 @@ impl Parser<BlockAttribute> for BlockAttributeParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use crate::core::parser::ParserOptions;
     use crate::core::parser::VectorTokenStream;
     use crate::core::parser::ast::Arg;
diff --git a/src/core/parser/components/declarations.rs b/src/core/parser/components/declarations.rs
@@ -810,8 +810,6 @@ impl DeclarationWrapper<TypeDecl> for TypeDeclParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use crate::core::parser::components::declarations::{
         DatasourceParser, EnumParser, GeneratorParser, ModelParser,
         TypeDeclParser,
diff --git a/src/core/parser/components/doc_integration_tests.rs b/src/core/parser/components/doc_integration_tests.rs
@@ -1,7 +1,5 @@
 //! Integration tests for doc comment association across all parsers.
 
-#![expect(clippy::unwrap_used)]
-
 use crate::core::parser::components::attributes::{
     BlockAttributeParser, FieldAttributeParser,
 };
@@ -19,7 +17,7 @@ use crate::core::scanner::tokens::{Token, TokenType};
 /// Helper to create a `DocComment` token.
 fn doc_token(text: &str, line: u32) -> Token {
     Token::new(
-        TokenType::DocComment(format!("///{text}")),
+        TokenType::DocComment(text.to_string()),
         (line, 1),
         (line, 4 + u32::try_from(text.len()).unwrap_or(0)),
     )
diff --git a/src/core/parser/components/expressions.rs b/src/core/parser/components/expressions.rs
@@ -1023,8 +1023,6 @@ impl Parser<Expr> for ExpressionParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use crate::core::parser::components::expressions::ExpressionParser;
     use crate::core::parser::components::helpers::{
         extract_doc_text, parse_leading_docs,
@@ -1442,7 +1440,7 @@ mod tests {
         end_col: u32,
     ) -> Token {
         Token::new(
-            TokenType::DocComment(format!("///{text}")),
+            TokenType::DocComment(text.to_string()),
             (line, start_col),
             (line, end_col),
         )
@@ -1639,9 +1637,10 @@ mod tests {
         assert!(result.is_some());
         let docs = result.unwrap();
         assert_eq!(docs.lines.len(), 3);
-        assert_eq!(docs.lines[0], ""); // Trimmed to empty
+        // Only a single leading space is removed; remaining spaces preserved
+        assert_eq!(docs.lines[0], "  ");
         assert_eq!(docs.lines[1], "Real content");
-        assert_eq!(docs.lines[2], ""); // Trimmed to empty
+        assert_eq!(docs.lines[2], "    ");
     }
 
     #[test]
@@ -1701,7 +1700,7 @@ mod tests {
     #[test]
     fn extract_doc_text_with_prefix() {
         let token = Token::new(
-            TokenType::DocComment("/// This is documentation".to_string()),
+            TokenType::DocComment(" This is documentation".to_string()),
             (1, 1),
             (1, 25),
         );
@@ -1725,13 +1724,14 @@ mod tests {
     #[test]
     fn extract_doc_text_with_extra_whitespace() {
         let token = Token::new(
-            TokenType::DocComment("///   This has extra spaces   ".to_string()),
+            TokenType::DocComment("   This has extra spaces   ".to_string()),
             (1, 1),
             (1, 31),
         );
 
         let result = extract_doc_text(&token);
-        assert_eq!(result, Some("This has extra spaces".to_string()));
+        // Only a single leading space is removed; preserve the rest
+        assert_eq!(result, Some("  This has extra spaces   ".to_string()));
     }
 
     #[test]
@@ -1750,17 +1750,17 @@ mod tests {
     fn docs_span_calculation() {
         let tokens = vec![
             Token::new(
-                TokenType::DocComment("/// First".to_string()),
+                TokenType::DocComment(" First".to_string()),
                 (1, 1),
                 (1, 10),
             ),
             Token::new(
-                TokenType::DocComment("/// Second".to_string()),
+                TokenType::DocComment(" Second".to_string()),
                 (2, 1),
                 (2, 11),
             ),
             Token::new(
-                TokenType::DocComment("/// Third".to_string()),
+                TokenType::DocComment(" Third".to_string()),
                 (3, 1),
                 (3, 10),
             ),
diff --git a/src/core/parser/components/helpers.rs b/src/core/parser/components/helpers.rs
@@ -26,14 +26,16 @@ pub(crate) fn span_from_to(a: &SymbolSpan, b: &SymbolSpan) -> SymbolSpan {
 
 /// Extract documentation text from a `DocComment` token.
 ///
-/// Normalizes the raw doc comment text by stripping an optional leading
-/// `///` prefix and trimming surrounding whitespace. Works for inputs with
-/// or without the `///` prefix.
+/// Given a `DocComment` token whose text is the content after the `///`
+/// marker, remove at most one leading space. Preserve all other whitespace.
 #[must_use]
 pub fn extract_doc_text(token: &Token) -> Option<String> {
     if let TokenType::DocComment(text) = token.r#type() {
-        let s = text.strip_prefix("///").unwrap_or(text).trim();
-        Some(s.to_string())
+        if let Some(rest) = text.strip_prefix(' ') {
+            Some(rest.to_string())
+        } else {
+            Some(text.to_string())
+        }
     } else {
         None
     }
@@ -114,8 +116,6 @@ pub fn parse_leading_docs(stream: &mut dyn TokenStream) -> Option<Docs> {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use super::*;
     use crate::core::parser::stream::VectorTokenStream;
 
@@ -125,14 +125,28 @@ mod tests {
 
     #[test]
     fn extract_doc_text_variants() {
-        let t = tok(TokenType::DocComment("/// hello".into()));
+        let t = tok(TokenType::DocComment(" hello".into()));
         assert_eq!(extract_doc_text(&t).unwrap(), "hello");
         let t = tok(TokenType::DocComment("plain".into()));
         assert_eq!(extract_doc_text(&t).unwrap(), "plain");
         let t = tok(TokenType::Comment(" not-doc".into()));
         assert!(extract_doc_text(&t).is_none());
     }
 
+    #[test]
+    fn extract_doc_text_removes_only_one_space() {
+        let t = tok(TokenType::DocComment("   many spaces".into()));
+        // Only the first leading space is removed; remaining preserved
+        assert_eq!(extract_doc_text(&t).unwrap(), "  many spaces");
+    }
+
+    #[test]
+    fn extract_doc_text_preserves_tabs_and_other_whitespace() {
+        let t = tok(TokenType::DocComment("\tTabbed doc".into()));
+        // Not a space prefix, so unchanged
+        assert_eq!(extract_doc_text(&t).unwrap(), "\tTabbed doc");
+    }
+
     #[test]
     fn parse_leading_docs_none_and_some() {
         // None path (no docs)
diff --git a/src/core/parser/components/members.rs b/src/core/parser/components/members.rs
@@ -606,8 +606,6 @@ impl Parser<EnumMember> for EnumMemberParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::expect_used, clippy::unwrap_used)]
-
     use crate::core::parser::ast::{
         EnumMember, Expr, HasNodeType, ModelMember, NamedType, TypeRef,
     };
diff --git a/src/core/parser/components/primitives.rs b/src/core/parser/components/primitives.rs
@@ -300,8 +300,6 @@ impl Parser<QualifiedIdent> for QualifiedIdentParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use crate::core::parser::components::primitives::{
         IdentParser, QualifiedIdentParser,
     };
diff --git a/src/core/parser/components/types.rs b/src/core/parser/components/types.rs
@@ -336,8 +336,6 @@ impl Parser<TypeRef> for TypeRefParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used, clippy::expect_used)]
-
     use crate::core::parser::components::types::{
         NamedTypeParser, TypeRefParser,
     };
diff --git a/src/core/parser/config.rs b/src/core/parser/config.rs
@@ -241,7 +241,6 @@ impl<T> ParseResult<T> {
 
 #[cfg(test)]
 mod config_tests {
-    #![expect(clippy::unwrap_used)]
     use super::*;
     use crate::core::scanner::tokens::{SymbolLocation, SymbolSpan};
 
diff --git a/src/core/parser/progress.rs b/src/core/parser/progress.rs
@@ -250,7 +250,6 @@ impl TokenStream for ObservedTokenStream<'_> {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::expect_used)]
     use super::*;
     use crate::core::parser::stream::VectorTokenStream;
 
diff --git a/src/core/parser/schema_parser.rs b/src/core/parser/schema_parser.rs
@@ -546,7 +546,6 @@ struct Block {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
     use super::*;
     use std::sync::{
         Arc,
diff --git a/src/core/parser/stream.rs b/src/core/parser/stream.rs
@@ -399,8 +399,6 @@ impl<T: ?Sized + TokenStream> TokenStreamExt for T {}
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::expect_used, clippy::unwrap_used)]
-
     use super::*;
 
     fn create_test_token(token_type: TokenType) -> Token {
diff --git a/src/core/scanner/lexer.rs b/src/core/scanner/lexer.rs
diff --git a/src/core/scanner/tokens.rs b/src/core/scanner/tokens.rs