Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 28 additions & 15 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,24 +331,37 @@ pub fn is_whitespace(c: char) -> bool {

matches!(
c,
// Usual ASCII suspects
'\u{0009}' // \t
| '\u{000A}' // \n
// End-of-line characters
| '\u{000A}' // line feed (\n)
| '\u{000B}' // vertical tab
| '\u{000C}' // form feed
| '\u{000D}' // \r
| '\u{0020}' // space

// NEXT LINE from latin1
| '\u{0085}'
| '\u{000D}' // carriage return (\r)
| '\u{0085}' // next line (from latin1)
| '\u{2028}' // LINE SEPARATOR
| '\u{2029}' // PARAGRAPH SEPARATOR

// Bidi markers
// `Default_Ignorable_Code_Point` characters
| '\u{200E}' // LEFT-TO-RIGHT MARK
| '\u{200F}' // RIGHT-TO-LEFT MARK

// Dedicated whitespace characters from Unicode
| '\u{2028}' // LINE SEPARATOR
| '\u{2029}' // PARAGRAPH SEPARATOR
// Horizontal space characters
| '\u{0009}' // tab (\t)
| '\u{0020}' // space
)
}

/// True if `c` is considered horizontal whitespace according to Rust language definition.
pub fn is_horizontal_whitespace(c: char) -> bool {
// This is Pattern_White_Space.
//
// Note that this set is stable (ie, it doesn't change with different
// Unicode versions), so it's ok to just hard-code the values.

matches!(
c,
// Horizontal space characters
'\u{0009}' // tab (\t)
| '\u{0020}' // space
)
}

Expand Down Expand Up @@ -538,7 +551,7 @@ impl Cursor<'_> {
debug_assert!(length_opening >= 3);

// whitespace between the opening and the infostring.
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));

// copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
// `---Cargo.toml` as a valid opener
Expand All @@ -547,7 +560,7 @@ impl Cursor<'_> {
self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
}

self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
let invalid_infostring = self.first() != '\n';

let mut found = false;
Expand Down Expand Up @@ -588,7 +601,7 @@ impl Cursor<'_> {
// on a standalone line. Might be wrong.
while let Some(closing) = rest.find("---") {
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
// candidate found
potential_closing = Some(closing);
break;
Expand Down
6 changes: 3 additions & 3 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr
use rustc_errors::codes::*;
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
use rustc_lexer::{
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
};
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
use rustc_session::lint::BuiltinLintDiag;
Expand Down Expand Up @@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {

let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
let last_line = &within[last_line_start..];
let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);

let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
Expand Down Expand Up @@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
});
}

if !rest.trim_matches(is_whitespace).is_empty() {
if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
let span = self.mk_sp(last_line_start_pos, self.pos);
self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
}
Expand Down
1 change: 1 addition & 0 deletions tests/ui/.gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ json-bom-plus-crlf.rs -text
json-bom-plus-crlf-multifile.rs -text
json-bom-plus-crlf-multifile-aux.rs -text
trailing-carriage-return-in-string.rs -text
frontmatter-crlf.rs -text
*.bin -text
22 changes: 22 additions & 0 deletions tests/ui/frontmatter/frontmatter-contains-whitespace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env -S cargo -Zscript
---cargo
# Beware editing: it has numerous whitespace characters which are important.
# It contains one ranges from the 'PATTERN_WHITE_SPACE' property outlined in
# https://unicode.org/Public/UNIDATA/PropList.txt
#
# The characters in the first expression of the assertion can be generated
# from: "4\u{0C}+\n\t\r7\t*\u{20}2\u{85}/\u{200E}3\u{200F}*\u{2028}2\u{2029}"
package.description = """
4 +

7 * 2…/‎3‏*
2
"""
---

//@ check-pass

// Ensure the frontmatter can contain any whitespace

#![feature(frontmatter)]

fn main() {}
14 changes: 14 additions & 0 deletions tests/ui/frontmatter/frontmatter-crlf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env -S cargo -Zscript
---
[dependencies]
clap = "4"
---

//@ check-pass
// ignore-tidy-cr

// crlf line endings should be accepted

#![feature(frontmatter)]

fn main() {}
5 changes: 3 additions & 2 deletions tests/ui/frontmatter/frontmatter-whitespace-3.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@


---cargo
---
---cargo
---

// please note the whitespace characters after the first four lines.
// This ensures that we accept whitespaces before the frontmatter, after
Expand All @@ -10,6 +10,7 @@
//@ check-pass
// ignore-tidy-end-whitespace
// ignore-tidy-leading-newlines
// ignore-tidy-tab

#![feature(frontmatter)]

Expand Down
3 changes: 2 additions & 1 deletion tests/ui/frontmatter/frontmatter-whitespace-4.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
--- cargo
--- cargo
---

//@ check-pass
// ignore-tidy-tab
// A frontmatter infostring can have leading whitespace.

#![feature(frontmatter)]
Expand Down
Loading