Certora · pickx · Sep 28, 2022 · Sep 21, 2022 · Sep 21, 2022 · Sep 21, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,8 @@
 # Changelog
+## [0.4.1] - 2022-09-28
+### Fixed
+- No longer ignore line terminator kind, so CRLF is now parsed correctly.
+
 ## [0.4.0] - 2022-09-21
 ### Changed
 - Changed all references from "NatSpec" to the new name, "CVLDoc"

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "cvldoc_parser"
+name = "cvldoc_parser_core"
 version = "0.4.0"
 edition = "2021"
 

diff --git a/src/diagnostics.rs b/src/diagnostics.rs
@@ -1,4 +1,4 @@
-use crate::{AssociatedElement, CvlDoc, Tag, DocData};
+use crate::{AssociatedElement, CvlDoc, DocData, Tag};
 use lsp_types::{Diagnostic, DiagnosticSeverity, Range};
 
 impl AssociatedElement {
@@ -42,15 +42,16 @@ impl CvlDoc {
             const WARNING: DiagnosticSeverity = DiagnosticSeverity::WARNING;
             const ERROR: DiagnosticSeverity = DiagnosticSeverity::ERROR;
 
-            let mut add = |message: String, diag_range: Option<Range>, severity: DiagnosticSeverity| {
-                let diag = Diagnostic {
-                    range: diag_range.unwrap_or(self.range),
-                    severity: Some(severity),
-                    message,
-                    ..Default::default()
+            let mut add =
+                |message: String, diag_range: Option<Range>, severity: DiagnosticSeverity| {
+                    let diag = Diagnostic {
+                        range: diag_range.unwrap_or(self.range),
+                        severity: Some(severity),
+                        message,
+                        ..Default::default()
+                    };
+                    warnings.push(diag);
                 };
-                warnings.push(diag);
-            };
 
             if let Some(associated) = associated {
                 if tags.iter().all(|tag| tag.kind != Tag::Notice) {
@@ -80,7 +81,7 @@ impl CvlDoc {
                     }
                 }
             } else {
-                let error_desc = "no associated element for NatSpec documentation block".into();
+                let error_desc = "no associated element for CVLDoc documentation block".into();
                 add(error_desc, None, ERROR);
             }
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -200,6 +200,18 @@ impl Tag {
             _ => None,
         }
     }
+
+    pub(crate) fn len(&self) -> usize {
+        let len_without_ampersat = match self {
+            Tag::Dev => 3,
+            Tag::Title | Tag::Param => 5,
+            Tag::Notice | Tag::Return => 6,
+            Tag::Formula => 7,
+            Tag::Unexpected(s) => s.len(),
+        };
+
+        len_without_ampersat + 1
+    }
 }
 
 impl From<&str> for Tag {
@@ -219,6 +231,24 @@ impl From<&str> for Tag {
     }
 }
 
+impl From<String> for Tag {
+    fn from(mut s: String) -> Self {
+        if s.starts_with('@') {
+            s.remove(0);
+        }
+
+        match s.as_str() {
+            "title" => Tag::Title,
+            "notice" => Tag::Notice,
+            "dev" => Tag::Dev,
+            "param" => Tag::Param,
+            "return" => Tag::Return,
+            "formula" => Tag::Formula,
+            _ => Tag::Unexpected(s),
+        }
+    }
+}
+
 impl Display for AssociatedElement {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let kind = match self {

diff --git a/src/parse/associated_element.rs b/src/parse/associated_element.rs
@@ -175,10 +175,10 @@ fn invariant_decl<'src>() -> BoxedParser<'src, char, AssociatedElement, Simple<c
         "using",
     ]
     .map(just);
-    let new_natspec_start = just("///").or(just("/**").then_ignore(none_of('/')));
+    let new_cvldoc_start = just("///").or(just("/**").then_ignore(none_of('/')));
     let end_at_stopping_word = end()
         .or(choice(stopping).ignored())
-        .or(new_natspec_start.ignored())
+        .or(new_cvldoc_start.ignored())
         .to((None, None))
         .rewind();
 

diff --git a/src/parse/builder.rs b/src/parse/builder.rs
@@ -1,4 +1,6 @@
-use crate::util::span_to_range::{RangeConverter, Span, Spanned};
+use super::terminated_line::TerminatedLine;
+use crate::parse::terminated_line::JoinToString;
+use crate::util::span_to_range::{RangeConverter, Span};
 use crate::{AssociatedElement, CvlDoc, DocData, DocumentationTag, Tag};
 use color_eyre::eyre::{bail, ensure, eyre};
 use color_eyre::Report;
@@ -11,7 +13,7 @@ pub enum CvlDocBuilder {
         span: Span,
     },
     Documentation {
-        spanned_body: Vec<Spanned<String>>,
+        spanned_body: Vec<(TerminatedLine, Span)>,
         associated: Option<AssociatedElement>,
         span: Span,
     },
@@ -43,7 +45,7 @@ impl CvlDocBuilder {
                 span,
             } => {
                 ensure!(!spanned_body.is_empty(), "documentation has no body");
-                let tags = CvlDocBuilder::process_doc_body(&spanned_body, converter.clone());
+                let tags = CvlDocBuilder::process_doc_body(spanned_body, converter.clone());
 
                 let cvl_doc = CvlDoc {
                     raw: CvlDocBuilder::raw_data(src, span.clone())?,
@@ -59,97 +61,92 @@ impl CvlDocBuilder {
         }
     }
 
+    fn tag_from_content(content: &[char]) -> Option<Tag> {
+        let mut content_chars = content.iter().copied();
+
+        match content_chars.next() {
+            Some('@') => {
+                let tag_literal: String = content_chars
+                    .take_while(|c| !c.is_ascii_whitespace())
+                    .collect();
+                Some(tag_literal.into())
+            }
+            _ => None,
+        }
+    }
+
     fn process_doc_body(
-        spanned_body: &[(String, Span)],
+        spanned_body: Vec<(TerminatedLine, Span)>,
         converter: RangeConverter,
     ) -> Vec<DocumentationTag> {
         let mut tags = Vec::new();
 
         let mut cur_tag = Tag::default();
-        let mut cur_desc = String::new();
+        let mut cur_desc: Vec<TerminatedLine> = Vec::new();
         let mut cur_span = None;
 
-        let whitespace = &[' ', '\t'];
-
-        for (line, line_span) in spanned_body {
-            let line = line.trim();
-            if line.is_empty() {
-                continue;
-            }
+        static PADDING: &[char] = &[' ', '\t'];
 
-            let not_finished_with_previous_tag = !cur_desc.is_empty();
+        for (mut line, line_span) in spanned_body.into_iter() {
+            if let Some(new_tag) = CvlDocBuilder::tag_from_content(&line.content) {
+                if !cur_desc.is_empty() {
+                    let desc = std::mem::take(&mut cur_desc);
+                    let doc_tag = DocumentationTag::new(cur_tag, desc.join_to_string(), cur_span);
 
-            if line.starts_with('@') {
-                if not_finished_with_previous_tag {
-                    let doc_tag =
-                        DocumentationTag::new(cur_tag.clone(), cur_desc.clone(), cur_span);
                     tags.push(doc_tag);
-
-                    cur_desc.clear();
                 }
 
-                let (tag, desc) = line.split_once(whitespace).unwrap_or_else(|| {
-                    //I'm not sure if it is an error to have a line that starts with @,
-                    //but has no (horizontal) whitespace. for now we accept this.
-
-                    //note that this condition includes newlines
-                    let last_non_whitespace =
-                        line.rfind(|c: char| !c.is_ascii_whitespace()).unwrap();
-                    line.split_at(last_non_whitespace)
-                });
-
-                cur_tag = tag.into();
-
-                cur_desc.push_str(desc);
-
+                line.content.drain(..new_tag.len() + 1);
+                cur_tag = new_tag;
                 cur_span = {
                     let start = line_span.start;
-                    let span = start..start + tag.chars().count();
+                    let span = start..start + cur_tag.len();
                     Some(converter.to_range(span))
                 };
-            } else {
-                //then it is a run-on description
-                if not_finished_with_previous_tag {
-                    cur_desc.push('\n');
-                }
-                cur_desc.push_str(line);
             }
+
+            line = line.trim(PADDING);
+            cur_desc.push(line);
         }
 
         // this check deals with the cases where the body was empty,
         // or contained only whitespace lines.
         // otherwise we are guaranteed to have an in-progress tag that should be pushed.
         if !cur_desc.is_empty() {
-            let doc_tag = DocumentationTag::new(cur_tag, cur_desc, cur_span);
+            let doc_tag = DocumentationTag::new(cur_tag, cur_desc.join_to_string(), cur_span);
             tags.push(doc_tag);
         }
 
         tags
     }
 }
 
-pub(super) fn split_starred_doc_lines(stream: Vec<char>, span: Span) -> Vec<(String, Span)> {
+pub(super) fn split_starred_doc_lines(
+    stream: Vec<char>,
+    span: Span,
+) -> Vec<(TerminatedLine, Span)> {
     let not_padding = |c: &char| !c.is_ascii_whitespace() && *c != '*';
+    static PADDING: &[char] = &[' ', '\t', '*'];
     let mut next_line_start = span.start;
 
     stream
         .split_inclusive(|&c| c == '\n')
         .map(|line| {
-            //we still update the start position
-            //even if the line is later skipped.
             let line_start = next_line_start;
             next_line_start += line.len();
 
             (line, line_start)
         })
-        .filter_map(|(line, line_start)| {
-            let trimmed_start = line.iter().position(not_padding)?;
-            let trimmed_end = line.iter().rposition(not_padding)?;
-
+        .map(|(line, line_start)| {
+            let trimmed_start = line.iter().position(not_padding).unwrap_or(0);
+            let trimmed_end = line.iter().rposition(not_padding).unwrap_or(line.len());
             let trimmed_span = (line_start + trimmed_start)..(line_start + trimmed_end);
-            let trimmed_line = line[trimmed_start..=trimmed_end].iter().collect();
 
-            Some((trimmed_line, trimmed_span))
+            let terminated_line = TerminatedLine::from_char_slice(line)
+                .trim_start(PADDING)
+                .trim_end(PADDING);
+
+            (terminated_line, trimmed_span)
         })
         .collect()
 }
diff --git a/src/parse/helpers.rs b/src/parse/helpers.rs
@@ -1,10 +1,11 @@
+use super::terminated_line::{TerminatedLine, Terminator};
 use chumsky::combinator::Repeated;
 use chumsky::prelude::*;
 use chumsky::primitive::OneOf;
 use std::hash::Hash;
 
 pub(super) fn newline<'src>() -> impl Parser<char, &'src str, Error = Simple<char>> + Clone {
-    const NEWLINE: &[&str; 2] = &["\r\n", "\n"];
+    static NEWLINE: &[&str; 2] = &["\r\n", "\n"];
     let newline_parsers = NEWLINE.map(just);
     choice(newline_parsers)
 }
@@ -14,8 +15,20 @@ pub(super) fn newline_or_end<'src>() -> impl Parser<char, &'src str, Error = Sim
     newline().or(end).boxed()
 }
 
+pub(super) fn line_with_terminator(
+) -> impl Parser<char, TerminatedLine, Error = Simple<char>> + Clone {
+    let terminator = choice([
+        just("\r\n").to(Terminator::CRLF).boxed(),
+        just('\n').to(Terminator::LF).boxed(),
+        just('\r').to(Terminator::CR).boxed(),
+        end().to(Terminator::EOF).boxed(),
+    ]);
+
+    take_until(terminator).map(|(content, terminator)| TerminatedLine::new(content, terminator))
+}
+
 pub(super) fn horizontal_ws<'src>() -> Repeated<OneOf<char, &'src [char; 2], Simple<char>>> {
-    const HORIZONTAL_WHITESPACE: &[char; 2] = &[' ', '\t'];
+    static HORIZONTAL_WHITESPACE: &[char; 2] = &[' ', '\t'];
     one_of(HORIZONTAL_WHITESPACE).repeated()
 }
 
@@ -32,7 +45,12 @@ where
 }
 
 pub(super) fn take_to_newline_or_end<'src>() -> BoxedParser<'src, char, Vec<char>, Simple<char>> {
-    take_until_without_terminator(newline_or_end()).boxed()
+    take_until(newline_or_end())
+        .map(|(mut content, line_end)| {
+            content.extend(line_end.chars());
+            content
+        })
+        .boxed()
 }
 
 pub(super) fn take_to_starred_terminator<'src>() -> BoxedParser<'src, char, Vec<char>, Simple<char>>
@@ -46,7 +64,7 @@ pub(super) fn single_line_cvl_comment() -> impl Parser<char, (), Error = Simple<
 
 pub(super) fn multi_line_cvl_comment() -> impl Parser<char, (), Error = Simple<char>> {
     //this is a somewhat tricky parse.
-    //we want to avoid parsing "/**" as a cvl comment, to give priority to starred natspec comments.
+    //we want to avoid parsing "/**" as a cvl comment, to give priority to starred cvldoc comments.
     //however, this creates an edge case.
     let edge_case_starter = just("/**/");
     let multi_line_starter = just("/*").then_ignore(none_of('*'));