Skip to content

Commit 54003da

Browse files
authored
Merge pull request #19 from mkpro118/enhance-scanner-and-parser
Add lexer recovery; improve string/number lexing; derive Eq/Hash
2 parents 67493e6 + 6e7f964 commit 54003da

File tree

15 files changed

+310
-104
lines changed

15 files changed

+310
-104
lines changed

src/core/parser/ast/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ pub struct BlockAttribute {
607607
/// assert!(q.is_simple());
608608
/// assert_eq!(q.as_simple().unwrap().text, "String");
609609
/// ```
610-
#[derive(Debug, Clone, AstContainerNode)]
610+
#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
611611
pub struct QualifiedIdent {
612612
/// The identifier parts (e.g., `["db", "VarChar"]`).
613613
pub parts: Vec<Ident>,
@@ -660,7 +660,7 @@ impl QualifiedIdent {
660660
/// assert_eq!(named.node_type(), "NamedType");
661661
/// assert_eq!(list.node_type(), "ListType");
662662
/// ```
663-
#[derive(Debug, Clone)]
663+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
664664
pub enum TypeRef {
665665
/// A named type (scalar, enum, or model reference).
666666
Named(NamedType),
@@ -713,7 +713,7 @@ impl AstNode for TypeRef {
713713
/// };
714714
/// assert_eq!(nt.node_type(), "NamedType");
715715
/// ```
716-
#[derive(Debug, Clone, AstContainerNode)]
716+
#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
717717
pub struct NamedType {
718718
/// The type name (may be qualified for built-ins).
719719
pub name: QualifiedIdent,
@@ -743,7 +743,7 @@ pub struct NamedType {
743743
/// let list = ListType { inner: Box::new(inner), span: sp((1,1),(1,6)) };
744744
/// assert_eq!(list.node_type(), "ListType");
745745
/// ```
746-
#[derive(Debug, Clone, AstContainerNode)]
746+
#[derive(Debug, Clone, PartialEq, Eq, Hash, AstContainerNode)]
747747
pub struct ListType {
748748
/// The inner type.
749749
pub inner: Box<TypeRef>,
@@ -1076,7 +1076,7 @@ impl AstNode for ObjectKey {}
10761076
/// An identifier with its source text and span.
10771077
///
10781078
/// Keeps the original identifier text and a span over it.
1079-
#[derive(Debug, Clone, AstLeafNode)]
1079+
#[derive(Debug, Clone, PartialEq, Eq, Hash, AstLeafNode)]
10801080
pub struct Ident {
10811081
/// The identifier text.
10821082
pub text: String,

src/core/parser/components/attributes.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,6 @@ impl Parser<BlockAttribute> for BlockAttributeParser {
802802

803803
#[cfg(test)]
804804
mod tests {
805-
#![expect(clippy::unwrap_used)]
806-
807805
use crate::core::parser::ParserOptions;
808806
use crate::core::parser::VectorTokenStream;
809807
use crate::core::parser::ast::Arg;

src/core/parser/components/declarations.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -810,8 +810,6 @@ impl DeclarationWrapper<TypeDecl> for TypeDeclParser {
810810

811811
#[cfg(test)]
812812
mod tests {
813-
#![expect(clippy::unwrap_used)]
814-
815813
use crate::core::parser::components::declarations::{
816814
DatasourceParser, EnumParser, GeneratorParser, ModelParser,
817815
TypeDeclParser,

src/core/parser/components/doc_integration_tests.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
//! Integration tests for doc comment association across all parsers.
22
3-
#![expect(clippy::unwrap_used)]
4-
53
use crate::core::parser::components::attributes::{
64
BlockAttributeParser, FieldAttributeParser,
75
};
@@ -19,7 +17,7 @@ use crate::core::scanner::tokens::{Token, TokenType};
1917
/// Helper to create a `DocComment` token.
2018
fn doc_token(text: &str, line: u32) -> Token {
2119
Token::new(
22-
TokenType::DocComment(format!("///{text}")),
20+
TokenType::DocComment(text.to_string()),
2321
(line, 1),
2422
(line, 4 + u32::try_from(text.len()).unwrap_or(0)),
2523
)

src/core/parser/components/expressions.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,8 +1023,6 @@ impl Parser<Expr> for ExpressionParser {
10231023

10241024
#[cfg(test)]
10251025
mod tests {
1026-
#![expect(clippy::unwrap_used)]
1027-
10281026
use crate::core::parser::components::expressions::ExpressionParser;
10291027
use crate::core::parser::components::helpers::{
10301028
extract_doc_text, parse_leading_docs,
@@ -1442,7 +1440,7 @@ mod tests {
14421440
end_col: u32,
14431441
) -> Token {
14441442
Token::new(
1445-
TokenType::DocComment(format!("///{text}")),
1443+
TokenType::DocComment(text.to_string()),
14461444
(line, start_col),
14471445
(line, end_col),
14481446
)
@@ -1639,9 +1637,10 @@ mod tests {
16391637
assert!(result.is_some());
16401638
let docs = result.unwrap();
16411639
assert_eq!(docs.lines.len(), 3);
1642-
assert_eq!(docs.lines[0], ""); // Trimmed to empty
1640+
// Only a single leading space is removed; remaining spaces preserved
1641+
assert_eq!(docs.lines[0], " ");
16431642
assert_eq!(docs.lines[1], "Real content");
1644-
assert_eq!(docs.lines[2], ""); // Trimmed to empty
1643+
assert_eq!(docs.lines[2], " ");
16451644
}
16461645

16471646
#[test]
@@ -1701,7 +1700,7 @@ mod tests {
17011700
#[test]
17021701
fn extract_doc_text_with_prefix() {
17031702
let token = Token::new(
1704-
TokenType::DocComment("/// This is documentation".to_string()),
1703+
TokenType::DocComment(" This is documentation".to_string()),
17051704
(1, 1),
17061705
(1, 25),
17071706
);
@@ -1725,13 +1724,14 @@ mod tests {
17251724
#[test]
17261725
fn extract_doc_text_with_extra_whitespace() {
17271726
let token = Token::new(
1728-
TokenType::DocComment("/// This has extra spaces ".to_string()),
1727+
TokenType::DocComment(" This has extra spaces ".to_string()),
17291728
(1, 1),
17301729
(1, 31),
17311730
);
17321731

17331732
let result = extract_doc_text(&token);
1734-
assert_eq!(result, Some("This has extra spaces".to_string()));
1733+
// Only a single leading space is removed; preserve the rest
1734+
assert_eq!(result, Some(" This has extra spaces ".to_string()));
17351735
}
17361736

17371737
#[test]
@@ -1750,17 +1750,17 @@ mod tests {
17501750
fn docs_span_calculation() {
17511751
let tokens = vec![
17521752
Token::new(
1753-
TokenType::DocComment("/// First".to_string()),
1753+
TokenType::DocComment(" First".to_string()),
17541754
(1, 1),
17551755
(1, 10),
17561756
),
17571757
Token::new(
1758-
TokenType::DocComment("/// Second".to_string()),
1758+
TokenType::DocComment(" Second".to_string()),
17591759
(2, 1),
17601760
(2, 11),
17611761
),
17621762
Token::new(
1763-
TokenType::DocComment("/// Third".to_string()),
1763+
TokenType::DocComment(" Third".to_string()),
17641764
(3, 1),
17651765
(3, 10),
17661766
),

src/core/parser/components/helpers.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@ pub(crate) fn span_from_to(a: &SymbolSpan, b: &SymbolSpan) -> SymbolSpan {
2626

2727
/// Extract documentation text from a `DocComment` token.
2828
///
29-
/// Normalizes the raw doc comment text by stripping an optional leading
30-
/// `///` prefix and trimming surrounding whitespace. Works for inputs with
31-
/// or without the `///` prefix.
29+
/// Given a `DocComment` token whose text is the content after the `///`
30+
/// marker, remove at most one leading space. Preserve all other whitespace.
3231
#[must_use]
3332
pub fn extract_doc_text(token: &Token) -> Option<String> {
3433
if let TokenType::DocComment(text) = token.r#type() {
35-
let s = text.strip_prefix("///").unwrap_or(text).trim();
36-
Some(s.to_string())
34+
if let Some(rest) = text.strip_prefix(' ') {
35+
Some(rest.to_string())
36+
} else {
37+
Some(text.to_string())
38+
}
3739
} else {
3840
None
3941
}
@@ -114,8 +116,6 @@ pub fn parse_leading_docs(stream: &mut dyn TokenStream) -> Option<Docs> {
114116

115117
#[cfg(test)]
116118
mod tests {
117-
#![expect(clippy::unwrap_used)]
118-
119119
use super::*;
120120
use crate::core::parser::stream::VectorTokenStream;
121121

@@ -125,14 +125,28 @@ mod tests {
125125

126126
#[test]
127127
fn extract_doc_text_variants() {
128-
let t = tok(TokenType::DocComment("/// hello".into()));
128+
let t = tok(TokenType::DocComment(" hello".into()));
129129
assert_eq!(extract_doc_text(&t).unwrap(), "hello");
130130
let t = tok(TokenType::DocComment("plain".into()));
131131
assert_eq!(extract_doc_text(&t).unwrap(), "plain");
132132
let t = tok(TokenType::Comment(" not-doc".into()));
133133
assert!(extract_doc_text(&t).is_none());
134134
}
135135

136+
#[test]
137+
fn extract_doc_text_removes_only_one_space() {
138+
let t = tok(TokenType::DocComment(" many spaces".into()));
139+
// Only the first leading space is removed; remaining preserved
140+
assert_eq!(extract_doc_text(&t).unwrap(), " many spaces");
141+
}
142+
143+
#[test]
144+
fn extract_doc_text_preserves_tabs_and_other_whitespace() {
145+
let t = tok(TokenType::DocComment("\tTabbed doc".into()));
146+
// Not a space prefix, so unchanged
147+
assert_eq!(extract_doc_text(&t).unwrap(), "\tTabbed doc");
148+
}
149+
136150
#[test]
137151
fn parse_leading_docs_none_and_some() {
138152
// None path (no docs)

src/core/parser/components/members.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -606,8 +606,6 @@ impl Parser<EnumMember> for EnumMemberParser {
606606

607607
#[cfg(test)]
608608
mod tests {
609-
#![expect(clippy::expect_used, clippy::unwrap_used)]
610-
611609
use crate::core::parser::ast::{
612610
EnumMember, Expr, HasNodeType, ModelMember, NamedType, TypeRef,
613611
};

src/core/parser/components/primitives.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,6 @@ impl Parser<QualifiedIdent> for QualifiedIdentParser {
300300

301301
#[cfg(test)]
302302
mod tests {
303-
#![expect(clippy::unwrap_used)]
304-
305303
use crate::core::parser::components::primitives::{
306304
IdentParser, QualifiedIdentParser,
307305
};

src/core/parser/components/types.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,6 @@ impl Parser<TypeRef> for TypeRefParser {
336336

337337
#[cfg(test)]
338338
mod tests {
339-
#![expect(clippy::unwrap_used, clippy::expect_used)]
340-
341339
use crate::core::parser::components::types::{
342340
NamedTypeParser, TypeRefParser,
343341
};

src/core/parser/config.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ impl<T> ParseResult<T> {
241241

242242
#[cfg(test)]
243243
mod config_tests {
244-
#![expect(clippy::unwrap_used)]
245244
use super::*;
246245
use crate::core::scanner::tokens::{SymbolLocation, SymbolSpan};
247246

0 commit comments

Comments
 (0)