Skip to content

Improve error messages for raw strings (#60762) #70522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 1, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Cleanup error messages, improve docstrings
  • Loading branch information
rcoh committed Mar 29, 2020
commit c15f86b4b35a260b105dc472fc6e3556af8a8db0
2 changes: 1 addition & 1 deletion src/librustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl<'a> Cursor<'a> {
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
pub(crate) fn nth_char(&self, n: usize) -> char {
fn nth_char(&self, n: usize) -> char {
self.chars().nth(n).unwrap_or(EOF_CHAR)
}

Expand Down
47 changes: 31 additions & 16 deletions src/librustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,25 +141,41 @@ pub enum LiteralKind {
RawByteStr(UnvalidatedRawStr),
}

/// Represents something that looks like a raw string, but may have some
/// problems. Use `.validate()` to convert it into something
/// usable.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct UnvalidatedRawStr {
/// The prefix (`r###"`) is valid
valid_start: bool,
/// The number of leading `#`
n_start_hashes: usize,
/// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
n_end_hashes: usize,
/// The offset starting at `r` or `br` where the user may have intended to end the string.
/// Currently, it is the longest sequence of pattern `"#+"`.
possible_terminator_offset: Option<usize>,
}

/// Error produced validating a raw string. Represents cases like:
/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter`
/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
/// - Too many `#`s (>65536): `TooManyDelimiters`
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LexRawStrError {
/// Non # characters between `r` and `"` eg. `r#~"..`
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
InvalidStarter,
/// The string was never terminated. `possible_terminator_offset` is the best guess of where they
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
/// may have intended to terminate it.
NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
/// More than 65536 # signs
/// More than 65536 `#`s exist.
TooManyDelimiters,
}

/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
/// there are a matching number of `#` characters in both. Note that this will
/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub struct ValidatedRawStr {
n_hashes: u16,
Expand All @@ -172,27 +188,26 @@ impl ValidatedRawStr {
}

impl UnvalidatedRawStr {
pub fn started(&self) -> bool {
self.valid_start
}

pub fn validate(self) -> Result<ValidatedRawStr, LexRawStrError> {
if !self.valid_start {
return Err(LexRawStrError::InvalidStarter);
}

// Only up to 65535 `#`s are allowed in raw strings
let n_start_safe: u16 =
self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?;
match (self.n_start_hashes, self.n_end_hashes) {
(n_start, n_end) if n_start > n_end => Err(LexRawStrError::NoTerminator {
expected: n_start,

if self.n_start_hashes > self.n_end_hashes {
Err(LexRawStrError::NoTerminator {
expected: self.n_start_hashes,
found: self.n_end_hashes,
possible_terminator_offset: self.possible_terminator_offset,
}),
(n_start, n_end) => {
debug_assert_eq!(n_start, n_end);
Ok(ValidatedRawStr { n_hashes: n_start_safe })
}
})
} else {
// Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
// they must be equal.
debug_assert_eq!(self.n_start_hashes, self.n_end_hashes);
Ok(ValidatedRawStr { n_hashes: n_start_safe })
}
}
}
Expand Down Expand Up @@ -656,7 +671,7 @@ impl Cursor<'_> {
false
}

/// Eats the double-quoted string an UnvalidatedRawStr
/// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr {
debug_assert!(self.prev() == 'r');
let mut valid_start: bool = false;
Expand Down
9 changes: 4 additions & 5 deletions src/librustc_parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,13 +533,12 @@ impl<'a> StringReader<'a> {
}

if let Some(possible_offset) = possible_offset {
let span = self.mk_sp(
start + BytePos(possible_offset as u32),
start + BytePos(possible_offset as u32) + BytePos(found_terminators as u32),
);
let lo = start + BytePos(possible_offset as u32);
let hi = lo + BytePos(found_terminators as u32);
let span = self.mk_sp(lo, hi);
err.span_suggestion(
span,
"you might have intended to terminate the string here",
"consider terminating the string here",
"#".repeat(n_hashes),
Applicability::MaybeIncorrect,
);
Expand Down
1 change: 1 addition & 0 deletions src/librustc_parse/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#![feature(crate_visibility_modifier)]
#![feature(bindings_after_at)]
#![feature(try_blocks)]
#![feature(or_patterns)]

use rustc_ast::ast;
use rustc_ast::token::{self, Nonterminal};
Expand Down
11 changes: 7 additions & 4 deletions src/librustc_parse/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,12 @@ impl<'a> Parser<'a> {

fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool {
let prev_token_raw_str = match self.prev_token {
Token { kind: TokenKind::Literal(Lit { kind: LitKind::StrRaw(n), .. }), .. } => Some(n),
Token {
kind: TokenKind::Literal(Lit { kind: LitKind::ByteStrRaw(n), .. }), ..
kind:
TokenKind::Literal(Lit {
kind: LitKind::StrRaw(n) | LitKind::ByteStrRaw(n), ..
}),
..
} => Some(n),
_ => None,
};
Expand All @@ -300,11 +303,11 @@ impl<'a> Parser<'a> {
err.set_primary_message("too many `#` when terminating raw string");
err.span_suggestion(
self.token.span,
"Remove the extra `#`",
"remove the extra `#`",
String::new(),
Applicability::MachineApplicable,
);
err.note(&format!("The raw string started with {} `#`s", n_hashes));
err.note(&format!("the raw string started with {} `#`s", n_hashes));
return true;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/ui/parser/raw/raw-byte-string-eof.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string
--> $DIR/raw-byte-string-eof.rs:2:5
|
LL | br##"a"#;
| ^ - help: you might have intended to terminate the string here: `##`
| ^ - help: consider terminating the string here: `##`
| |
| unterminated raw string
|
Expand Down
4 changes: 2 additions & 2 deletions src/test/ui/parser/raw/raw-str-unbalanced.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ error: too many `#` when terminating raw string
--> $DIR/raw-str-unbalanced.rs:3:9
|
LL | "##
| ^ help: Remove the extra `#`
| ^ help: remove the extra `#`
|
= note: The raw string started with 1 `#`s
= note: the raw string started with 1 `#`s

error: aborting due to previous error

2 changes: 1 addition & 1 deletion src/test/ui/parser/raw/raw_string.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ error[E0748]: unterminated raw string
--> $DIR/raw_string.rs:2:13
|
LL | let x = r##"lol"#;
| ^ - help: you might have intended to terminate the string here: `##`
| ^ - help: consider terminating the string here: `##`
| |
| unterminated raw string
|
Expand Down