From fc1339bc9034e8faee771d73b7109d109fcce42a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Sat, 9 Mar 2024 00:35:57 +0000 Subject: [PATCH] Handle str literals written with `'` lexed as lifetime Given `'hello world'` and `'1 str', provide a structured suggestion for a valid string literal: ``` error[E0762]: unterminated character literal --> $DIR/lex-bad-str-literal-as-char-3.rs:2:26 | LL | println!('hello world'); | ^^^^ | help: if you meant to write a `str` literal, use double quotes | LL | println!("hello world"); | ~ ~ ``` ``` error[E0762]: unterminated character literal --> $DIR/lex-bad-str-literal-as-char-1.rs:2:20 | LL | println!('1 + 1'); | ^^^^ | help: if you meant to write a `str` literal, use double quotes | LL | println!("1 + 1"); | ~ ~ ``` Fix #119685. --- compiler/rustc_lexer/src/cursor.rs | 2 +- compiler/rustc_parse/messages.ftl | 1 + compiler/rustc_parse/src/errors.rs | 11 +++++ compiler/rustc_parse/src/lexer/mod.rs | 46 +++++++++++++++++-- .../lexer/lex-bad-str-literal-as-char-1.fixed | 6 +++ .../ui/lexer/lex-bad-str-literal-as-char-1.rs | 6 +++ .../lex-bad-str-literal-as-char-1.stderr | 20 ++++++++ .../lexer/lex-bad-str-literal-as-char-2.fixed | 4 ++ .../ui/lexer/lex-bad-str-literal-as-char-2.rs | 4 ++ .../lex-bad-str-literal-as-char-2.stderr | 13 ++++++ .../lexer/lex-bad-str-literal-as-char-3.fixed | 4 ++ .../ui/lexer/lex-bad-str-literal-as-char-3.rs | 4 ++ .../lex-bad-str-literal-as-char-3.stderr | 14 ++++++ 13 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-1.rs create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-2.rs create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-3.rs create mode 100644 tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index aba7f95487e9d..8d8cc9ecc3e9f 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -46,7 +46,7 @@ impl<'a> Cursor<'a> { /// If requested position doesn't exist, `EOF_CHAR` is returned. /// However, getting `EOF_CHAR` doesn't always mean actual end of file, /// it should be checked with `is_eof` method. - pub(crate) fn first(&self) -> char { + pub fn first(&self) -> char { // `.next()` optimizes better than `.nth(0)` self.chars.clone().next().unwrap_or(EOF_CHAR) } diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 60cc138fd7bc2..cf61dced90729 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -839,6 +839,7 @@ parse_unknown_prefix = prefix `{$prefix}` is unknown .label = unknown prefix .note = prefixed identifiers and literals are reserved since Rust 2021 .suggestion_br = use `br` for a raw byte string + .suggestion_str = if you meant to write a `str` literal, use double quotes .suggestion_whitespace = consider inserting whitespace here parse_unknown_start_of_token = unknown start of token: {$escaped} diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index c72b7e2cfa727..db1b477b91070 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -1994,6 +1994,17 @@ pub enum UnknownPrefixSugg { style = "verbose" )] Whitespace(#[primary_span] Span), + #[multipart_suggestion( + parse_suggestion_str, + applicability = "maybe-incorrect", + style = "verbose" + )] + MeantStr { + #[suggestion_part(code = "\"")] + start: Span, + #[suggestion_part(code = "\"")] + end: Span, + }, } #[derive(Diagnostic)] diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f57945a52df37..659e0b63d2ed8 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -63,6 +63,7 @@ pub(crate) fn parse_token_trees<'psess, 'src>( cursor, override_span, nbsp_is_whitespace: false, + last_lifetime: None, }; let (stream, res, unmatched_delims) = tokentrees::TokenTreesReader::parse_all_token_trees(string_reader); @@ -105,6 +106,10 @@ struct StringReader<'psess, 'src> { /// in this file, it's safe to treat further occurrences of the non-breaking /// space character as whitespace. nbsp_is_whitespace: bool, + + /// Track the `Span` for the leading `'` of the last lifetime. Used for + /// diagnostics to detect possible typo where `"` was meant. + last_lifetime: Option, } impl<'psess, 'src> StringReader<'psess, 'src> { @@ -130,6 +135,18 @@ impl<'psess, 'src> StringReader<'psess, 'src> { debug!("next_token: {:?}({:?})", token.kind, self.str_from(start)); + if let rustc_lexer::TokenKind::Semi + | rustc_lexer::TokenKind::LineComment { .. } + | rustc_lexer::TokenKind::BlockComment { .. } + | rustc_lexer::TokenKind::CloseParen + | rustc_lexer::TokenKind::CloseBrace + | rustc_lexer::TokenKind::CloseBracket = token.kind + { + // Heuristic: we assume that it is unlikely we're dealing with an unterminated + // string surrounded by single quotes. + self.last_lifetime = None; + } + // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs // additional validation. @@ -247,6 +264,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = self.str_from(start); + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); self.dcx().struct_err("lifetimes cannot start with a number") @@ -395,10 +413,21 @@ impl<'psess, 'src> StringReader<'psess, 'src> { match kind { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { - self.dcx() + let mut err = self + .dcx() .struct_span_fatal(self.mk_sp(start, end), "unterminated character literal") - .with_code(E0762) - .emit() + .with_code(E0762); + if let Some(lt_sp) = self.last_lifetime { + err.multipart_suggestion( + "if you meant to write a `str` literal, use double quotes", + vec![ + (lt_sp, "\"".to_string()), + (self.mk_sp(start, start + BytePos(1)), "\"".to_string()), + ], + Applicability::MaybeIncorrect, + ); + } + err.emit() } self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' ' } @@ -673,7 +702,16 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) } else if expn_data.is_root() { - Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi())) + if self.cursor.first() == '\'' + && let Some(start) = self.last_lifetime + { + Some(errors::UnknownPrefixSugg::MeantStr { + start, + end: self.mk_sp(self.pos, self.pos + BytePos(1)), + }) + } else { + Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi())) + } } else { None }; diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed b/tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed new file mode 100644 index 0000000000000..b12139b0b40e9 --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed @@ -0,0 +1,6 @@ +//@ run-rustfix +fn main() { + println!("1 + 1"); + //~^ ERROR unterminated character literal + //~| ERROR lifetimes cannot start with a number +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-1.rs b/tests/ui/lexer/lex-bad-str-literal-as-char-1.rs new file mode 100644 index 0000000000000..6548792f33b4f --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.rs @@ -0,0 +1,6 @@ +//@ run-rustfix +fn main() { + println!('1 + 1'); + //~^ ERROR unterminated character literal + //~| ERROR lifetimes cannot start with a number +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr new file mode 100644 index 0000000000000..675624cfa9415 --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr @@ -0,0 +1,20 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-str-literal-as-char-1.rs:3:20 + | +LL | println!('1 + 1'); + | ^^^ + | +help: if you meant to write a `str` literal, use double quotes + | +LL | println!("1 + 1"); + | ~ ~ + +error: lifetimes cannot start with a number + --> $DIR/lex-bad-str-literal-as-char-1.rs:3:14 + | +LL | println!('1 + 1'); + | ^^ + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed b/tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed new file mode 100644 index 0000000000000..3ccec537c6c34 --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed @@ -0,0 +1,4 @@ +//@ run-rustfix +fn main() { + println!(" 1 + 1"); //~ ERROR character literal may only contain one codepoint +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-2.rs b/tests/ui/lexer/lex-bad-str-literal-as-char-2.rs new file mode 100644 index 0000000000000..8af72e47dbb4b --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.rs @@ -0,0 +1,4 @@ +//@ run-rustfix +fn main() { + println!(' 1 + 1'); //~ ERROR character literal may only contain one codepoint +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr b/tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr new file mode 100644 index 0000000000000..8445d0595f3ed --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr @@ -0,0 +1,13 @@ +error: character literal may only contain one codepoint + --> $DIR/lex-bad-str-literal-as-char-2.rs:3:14 + | +LL | println!(' 1 + 1'); + | ^^^^^^^^ + | +help: if you meant to write a `str` literal, use double quotes + | +LL | println!(" 1 + 1"); + | ~~~~~~~~ + +error: aborting due to 1 previous error + diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed b/tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed new file mode 100644 index 0000000000000..3fbe0ea1dabef --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed @@ -0,0 +1,4 @@ +//@ run-rustfix +fn main() { + println!("hello world"); //~ ERROR unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-3.rs b/tests/ui/lexer/lex-bad-str-literal-as-char-3.rs new file mode 100644 index 0000000000000..289f3f1d6570f --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.rs @@ -0,0 +1,4 @@ +//@ run-rustfix +fn main() { + println!('hello world'); //~ ERROR unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr b/tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr new file mode 100644 index 0000000000000..ebfbeac02600b --- /dev/null +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr @@ -0,0 +1,14 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-str-literal-as-char-3.rs:3:26 + | +LL | println!('hello world'); + | ^^^^ + | +help: if you meant to write a `str` literal, use double quotes + | +LL | println!("hello world"); + | ~ ~ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`.