From b458c5dc53c88f4fdd86114bff1c4ddd264d7bb2 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sun, 2 Aug 2020 20:42:31 -0700 Subject: [PATCH] Match rustc's new shebang handling --- src/lib.rs | 20 ++++++++++----- src/whitespace.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 src/whitespace.rs diff --git a/src/lib.rs b/src/lib.rs index f95d43da84..f3c45746d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -456,6 +456,9 @@ pub mod parse_macro_input; #[cfg(all(feature = "parsing", feature = "printing"))] pub mod spanned; +#[cfg(all(feature = "parsing", feature = "full"))] +mod whitespace; + mod gen { /// Syntax tree traversal to walk a shared borrow of a syntax tree. /// @@ -943,13 +946,16 @@ pub fn parse_file(mut content: &str) -> Result { } let mut shebang = None; - if content.starts_with("#!") && !content.starts_with("#![") { - if let Some(idx) = content.find('\n') { - shebang = Some(content[..idx].to_string()); - content = &content[idx..]; - } else { - shebang = Some(content.to_string()); - content = ""; + if content.starts_with("#!") { + let rest = whitespace::skip(&content[2..]); + if !rest.starts_with('[') { + if let Some(idx) = content.find('\n') { + shebang = Some(content[..idx].to_string()); + content = &content[idx..]; + } else { + shebang = Some(content.to_string()); + content = ""; + } } } diff --git a/src/whitespace.rs b/src/whitespace.rs new file mode 100644 index 0000000000..7be082e1a2 --- /dev/null +++ b/src/whitespace.rs @@ -0,0 +1,65 @@ +pub fn skip(mut s: &str) -> &str { + 'skip: while !s.is_empty() { + let byte = s.as_bytes()[0]; + if byte == b'/' { + if s.starts_with("//") + && (!s.starts_with("///") || s.starts_with("////")) + && !s.starts_with("//!") + { + if let Some(i) = s.find('\n') { + s = &s[i + 1..]; + continue; + } else { + return ""; + } + } else if s.starts_with("/**/") { + s = &s[4..]; + continue; + } else if s.starts_with("/*") + && (!s.starts_with("/**") || s.starts_with("/***")) + && !s.starts_with("/*!") + { + let mut depth = 0; + let bytes = s.as_bytes(); + let mut i = 0; + let upper = bytes.len() - 1; + while i < upper { + if bytes[i] == b'/' && bytes[i + 1] == b'*' { + depth += 1; + i += 1; // eat '*' + } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { + depth -= 1; + if depth == 0 { + s = &s[i + 2..]; + continue 'skip; + } + i += 1; // eat '/' + } + i += 1; + } + return s; + } + } + match byte { + b' ' | 0x09..=0x0d => { + s = &s[1..]; + continue; + } + b if b <= 0x7f => {} + _ => { + let ch = s.chars().next().unwrap(); + if is_whitespace(ch) { + s = &s[ch.len_utf8()..]; + continue; + } + } + } + return s; + } + s +} + +fn is_whitespace(ch: char) -> bool { + // Rust treats left-to-right mark and right-to-left mark as whitespace + ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' +}