From b458c5dc53c88f4fdd86114bff1c4ddd264d7bb2 Mon Sep 17 00:00:00 2001
From: David Tolnay <dtolnay@gmail.com>
Date: Sun, 2 Aug 2020 20:42:31 -0700
Subject: [PATCH] Match rustc's new shebang handling

---
 src/lib.rs        | 20 ++++++++++-----
 src/whitespace.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 src/whitespace.rs
diff --git a/src/lib.rs b/src/lib.rs
index f95d43da84..f3c45746d1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -456,6 +456,9 @@ pub mod parse_macro_input;
 #[cfg(all(feature = "parsing", feature = "printing"))]
 pub mod spanned;
 
+#[cfg(all(feature = "parsing", feature = "full"))]
+mod whitespace;
+
 mod gen {
     /// Syntax tree traversal to walk a shared borrow of a syntax tree.
     ///
@@ -943,13 +946,16 @@ pub fn parse_file(mut content: &str) -> Result<File> {
     }
 
     let mut shebang = None;
-    if content.starts_with("#!") && !content.starts_with("#![") {
-        if let Some(idx) = content.find('\n') {
-            shebang = Some(content[..idx].to_string());
-            content = &content[idx..];
-        } else {
-            shebang = Some(content.to_string());
-            content = "";
+    if content.starts_with("#!") {
+        let rest = whitespace::skip(&content[2..]);
+        if !rest.starts_with('[') {
+            if let Some(idx) = content.find('\n') {
+                shebang = Some(content[..idx].to_string());
+                content = &content[idx..];
+            } else {
+                shebang = Some(content.to_string());
+                content = "";
+            }
         }
     }
 
diff --git a/src/whitespace.rs b/src/whitespace.rs
new file mode 100644
index 0000000000..7be082e1a2
--- /dev/null
+++ b/src/whitespace.rs
@@ -0,0 +1,65 @@
+pub fn skip(mut s: &str) -> &str {
+    'skip: while !s.is_empty() {
+        let byte = s.as_bytes()[0];
+        if byte == b'/' {
+            if s.starts_with("//")
+                && (!s.starts_with("///") || s.starts_with("////"))
+                && !s.starts_with("//!")
+            {
+                if let Some(i) = s.find('\n') {
+                    s = &s[i + 1..];
+                    continue;
+                } else {
+                    return "";
+                }
+            } else if s.starts_with("/**/") {
+                s = &s[4..];
+                continue;
+            } else if s.starts_with("/*")
+                && (!s.starts_with("/**") || s.starts_with("/***"))
+                && !s.starts_with("/*!")
+            {
+                let mut depth = 0;
+                let bytes = s.as_bytes();
+                let mut i = 0;
+                let upper = bytes.len() - 1;
+                while i < upper {
+                    if bytes[i] == b'/' && bytes[i + 1] == b'*' {
+                        depth += 1;
+                        i += 1; // eat '*'
+                    } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
+                        depth -= 1;
+                        if depth == 0 {
+                            s = &s[i + 2..];
+                            continue 'skip;
+                        }
+                        i += 1; // eat '/'
+                    }
+                    i += 1;
+                }
+                return s;
+            }
+        }
+        match byte {
+            b' ' | 0x09..=0x0d => {
+                s = &s[1..];
+                continue;
+            }
+            b if b <= 0x7f => {}
+            _ => {
+                let ch = s.chars().next().unwrap();
+                if is_whitespace(ch) {
+                    s = &s[ch.len_utf8()..];
+                    continue;
+                }
+            }
+        }
+        return s;
+    }
+    s
+}
+
+fn is_whitespace(ch: char) -> bool {
+    // Rust treats left-to-right mark and right-to-left mark as whitespace
+    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
+}