Skip to content

Preserve line endings when parsing #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# Changelog
## [0.4.1] - 2022-09-28
### Fixed
- No longer ignore line terminator kind, so CRLF is now parsed correctly.

## [0.4.0] - 2022-09-21
### Changed
- Changed all references from "NatSpec" to the new name, "CVLDoc"
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "cvldoc_parser"
name = "cvldoc_parser_core"
version = "0.4.0"
edition = "2021"

Expand Down
21 changes: 11 additions & 10 deletions src/diagnostics.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{AssociatedElement, CvlDoc, Tag, DocData};
use crate::{AssociatedElement, CvlDoc, DocData, Tag};
use lsp_types::{Diagnostic, DiagnosticSeverity, Range};

impl AssociatedElement {
Expand Down Expand Up @@ -42,15 +42,16 @@ impl CvlDoc {
const WARNING: DiagnosticSeverity = DiagnosticSeverity::WARNING;
const ERROR: DiagnosticSeverity = DiagnosticSeverity::ERROR;

let mut add = |message: String, diag_range: Option<Range>, severity: DiagnosticSeverity| {
let diag = Diagnostic {
range: diag_range.unwrap_or(self.range),
severity: Some(severity),
message,
..Default::default()
let mut add =
|message: String, diag_range: Option<Range>, severity: DiagnosticSeverity| {
let diag = Diagnostic {
range: diag_range.unwrap_or(self.range),
severity: Some(severity),
message,
..Default::default()
};
warnings.push(diag);
};
warnings.push(diag);
};

if let Some(associated) = associated {
if tags.iter().all(|tag| tag.kind != Tag::Notice) {
Expand Down Expand Up @@ -80,7 +81,7 @@ impl CvlDoc {
}
}
} else {
let error_desc = "no associated element for NatSpec documentation block".into();
let error_desc = "no associated element for CVLDoc documentation block".into();
add(error_desc, None, ERROR);
}

Expand Down
30 changes: 30 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,18 @@ impl Tag {
_ => None,
}
}

pub(crate) fn len(&self) -> usize {
let len_without_ampersat = match self {
Tag::Dev => 3,
Tag::Title | Tag::Param => 5,
Tag::Notice | Tag::Return => 6,
Tag::Formula => 7,
Tag::Unexpected(s) => s.len(),
};

len_without_ampersat + 1
}
}

impl From<&str> for Tag {
Expand All @@ -219,6 +231,24 @@ impl From<&str> for Tag {
}
}

impl From<String> for Tag {
fn from(mut s: String) -> Self {
if s.starts_with('@') {
s.remove(0);
}

match s.as_str() {
"title" => Tag::Title,
"notice" => Tag::Notice,
"dev" => Tag::Dev,
"param" => Tag::Param,
"return" => Tag::Return,
"formula" => Tag::Formula,
_ => Tag::Unexpected(s),
}
}
}

impl Display for AssociatedElement {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let kind = match self {
Expand Down
4 changes: 2 additions & 2 deletions src/parse/associated_element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ fn invariant_decl<'src>() -> BoxedParser<'src, char, AssociatedElement, Simple<c
"using",
]
.map(just);
let new_natspec_start = just("///").or(just("/**").then_ignore(none_of('/')));
let new_cvldoc_start = just("///").or(just("/**").then_ignore(none_of('/')));
let end_at_stopping_word = end()
.or(choice(stopping).ignored())
.or(new_natspec_start.ignored())
.or(new_cvldoc_start.ignored())
.to((None, None))
.rewind();

Expand Down
97 changes: 47 additions & 50 deletions src/parse/builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::util::span_to_range::{RangeConverter, Span, Spanned};
use super::terminated_line::TerminatedLine;
use crate::parse::terminated_line::JoinToString;
use crate::util::span_to_range::{RangeConverter, Span};
use crate::{AssociatedElement, CvlDoc, DocData, DocumentationTag, Tag};
use color_eyre::eyre::{bail, ensure, eyre};
use color_eyre::Report;
Expand All @@ -11,7 +13,7 @@ pub enum CvlDocBuilder {
span: Span,
},
Documentation {
spanned_body: Vec<Spanned<String>>,
spanned_body: Vec<(TerminatedLine, Span)>,
associated: Option<AssociatedElement>,
span: Span,
},
Expand Down Expand Up @@ -43,7 +45,7 @@ impl CvlDocBuilder {
span,
} => {
ensure!(!spanned_body.is_empty(), "documentation has no body");
let tags = CvlDocBuilder::process_doc_body(&spanned_body, converter.clone());
let tags = CvlDocBuilder::process_doc_body(spanned_body, converter.clone());

let cvl_doc = CvlDoc {
raw: CvlDocBuilder::raw_data(src, span.clone())?,
Expand All @@ -59,97 +61,92 @@ impl CvlDocBuilder {
}
}

fn tag_from_content(content: &[char]) -> Option<Tag> {
let mut content_chars = content.iter().copied();

match content_chars.next() {
Some('@') => {
let tag_literal: String = content_chars
.take_while(|c| !c.is_ascii_whitespace())
.collect();
Some(tag_literal.into())
}
_ => None,
}
}

fn process_doc_body(
spanned_body: &[(String, Span)],
spanned_body: Vec<(TerminatedLine, Span)>,
converter: RangeConverter,
) -> Vec<DocumentationTag> {
let mut tags = Vec::new();

let mut cur_tag = Tag::default();
let mut cur_desc = String::new();
let mut cur_desc: Vec<TerminatedLine> = Vec::new();
let mut cur_span = None;

let whitespace = &[' ', '\t'];

for (line, line_span) in spanned_body {
let line = line.trim();
if line.is_empty() {
continue;
}
static PADDING: &[char] = &[' ', '\t'];

let not_finished_with_previous_tag = !cur_desc.is_empty();
for (mut line, line_span) in spanned_body.into_iter() {
if let Some(new_tag) = CvlDocBuilder::tag_from_content(&line.content) {
if !cur_desc.is_empty() {
let desc = std::mem::take(&mut cur_desc);
let doc_tag = DocumentationTag::new(cur_tag, desc.join_to_string(), cur_span);

if line.starts_with('@') {
if not_finished_with_previous_tag {
let doc_tag =
DocumentationTag::new(cur_tag.clone(), cur_desc.clone(), cur_span);
tags.push(doc_tag);

cur_desc.clear();
}

let (tag, desc) = line.split_once(whitespace).unwrap_or_else(|| {
//I'm not sure if it is an error to have a line that starts with @,
//but has no (horizontal) whitespace. for now we accept this.

//note that this condition includes newlines
let last_non_whitespace =
line.rfind(|c: char| !c.is_ascii_whitespace()).unwrap();
line.split_at(last_non_whitespace)
});

cur_tag = tag.into();

cur_desc.push_str(desc);

line.content.drain(..new_tag.len() + 1);
cur_tag = new_tag;
cur_span = {
let start = line_span.start;
let span = start..start + tag.chars().count();
let span = start..start + cur_tag.len();
Some(converter.to_range(span))
};
} else {
//then it is a run-on description
if not_finished_with_previous_tag {
cur_desc.push('\n');
}
cur_desc.push_str(line);
}

line = line.trim(PADDING);
cur_desc.push(line);
}

// this check deals with the cases where the body was empty,
// or contained only whitespace lines.
// otherwise we are guaranteed to have an in-progress tag that should be pushed.
if !cur_desc.is_empty() {
let doc_tag = DocumentationTag::new(cur_tag, cur_desc, cur_span);
let doc_tag = DocumentationTag::new(cur_tag, cur_desc.join_to_string(), cur_span);
tags.push(doc_tag);
}

tags
}
}

pub(super) fn split_starred_doc_lines(stream: Vec<char>, span: Span) -> Vec<(String, Span)> {
pub(super) fn split_starred_doc_lines(
stream: Vec<char>,
span: Span,
) -> Vec<(TerminatedLine, Span)> {
let not_padding = |c: &char| !c.is_ascii_whitespace() && *c != '*';
static PADDING: &[char] = &[' ', '\t', '*'];
let mut next_line_start = span.start;

stream
.split_inclusive(|&c| c == '\n')
.map(|line| {
//we still update the start position
//even if the line is later skipped.
let line_start = next_line_start;
next_line_start += line.len();

(line, line_start)
})
.filter_map(|(line, line_start)| {
let trimmed_start = line.iter().position(not_padding)?;
let trimmed_end = line.iter().rposition(not_padding)?;

.map(|(line, line_start)| {
let trimmed_start = line.iter().position(not_padding).unwrap_or(0);
let trimmed_end = line.iter().rposition(not_padding).unwrap_or(line.len());
let trimmed_span = (line_start + trimmed_start)..(line_start + trimmed_end);
let trimmed_line = line[trimmed_start..=trimmed_end].iter().collect();

Some((trimmed_line, trimmed_span))
let terminated_line = TerminatedLine::from_char_slice(line)
.trim_start(PADDING)
.trim_end(PADDING);

(terminated_line, trimmed_span)
})
.collect()
}
26 changes: 22 additions & 4 deletions src/parse/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use super::terminated_line::{TerminatedLine, Terminator};
use chumsky::combinator::Repeated;
use chumsky::prelude::*;
use chumsky::primitive::OneOf;
use std::hash::Hash;

pub(super) fn newline<'src>() -> impl Parser<char, &'src str, Error = Simple<char>> + Clone {
const NEWLINE: &[&str; 2] = &["\r\n", "\n"];
static NEWLINE: &[&str; 2] = &["\r\n", "\n"];
let newline_parsers = NEWLINE.map(just);
choice(newline_parsers)
}
Expand All @@ -14,8 +15,20 @@ pub(super) fn newline_or_end<'src>() -> impl Parser<char, &'src str, Error = Sim
newline().or(end).boxed()
}

pub(super) fn line_with_terminator(
) -> impl Parser<char, TerminatedLine, Error = Simple<char>> + Clone {
let terminator = choice([
just("\r\n").to(Terminator::CRLF).boxed(),
just('\n').to(Terminator::LF).boxed(),
just('\r').to(Terminator::CR).boxed(),
end().to(Terminator::EOF).boxed(),
]);

take_until(terminator).map(|(content, terminator)| TerminatedLine::new(content, terminator))
}

pub(super) fn horizontal_ws<'src>() -> Repeated<OneOf<char, &'src [char; 2], Simple<char>>> {
const HORIZONTAL_WHITESPACE: &[char; 2] = &[' ', '\t'];
static HORIZONTAL_WHITESPACE: &[char; 2] = &[' ', '\t'];
one_of(HORIZONTAL_WHITESPACE).repeated()
}

Expand All @@ -32,7 +45,12 @@ where
}

pub(super) fn take_to_newline_or_end<'src>() -> BoxedParser<'src, char, Vec<char>, Simple<char>> {
take_until_without_terminator(newline_or_end()).boxed()
take_until(newline_or_end())
.map(|(mut content, line_end)| {
content.extend(line_end.chars());
content
})
.boxed()
}

pub(super) fn take_to_starred_terminator<'src>() -> BoxedParser<'src, char, Vec<char>, Simple<char>>
Expand All @@ -46,7 +64,7 @@ pub(super) fn single_line_cvl_comment() -> impl Parser<char, (), Error = Simple<

pub(super) fn multi_line_cvl_comment() -> impl Parser<char, (), Error = Simple<char>> {
//this is a somewhat tricky parse.
//we want to avoid parsing "/**" as a cvl comment, to give priority to starred natspec comments.
//we want to avoid parsing "/**" as a cvl comment, to give priority to starred cvldoc comments.
//however, this creates an edge case.
let edge_case_starter = just("/**/");
let multi_line_starter = just("/*").then_ignore(none_of('*'));
Expand Down
Loading