Skip to content

Commit 2e1a501

Browse files
committed
syntax: support ES6-style unicode escapes
First half of bootstrapping rust-lang/rfcs#446
1 parent 3a325c6 commit 2e1a501

File tree

7 files changed

+169
-8
lines changed

7 files changed

+169
-8
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,15 @@ impl<'a> StringReader<'a> {
764764
}
765765
}
766766

767+
// SNAP c9f6d69
768+
#[allow(unused)]
769+
fn old_escape_warning(&mut self, sp: Span) {
770+
self.span_diagnostic
771+
.span_warn(sp, "\\U00ABCD12 and \\uABCD escapes are deprecated");
772+
self.span_diagnostic
773+
.span_help(sp, "use \\u{ABCD12} escapes instead");
774+
}
775+
767776
/// Scan for a single (possibly escaped) byte or char
768777
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
769778
/// `start` is the position of `first_source_char`, which is already consumed.
@@ -782,12 +791,24 @@ impl<'a> StringReader<'a> {
782791
Some(e) => {
783792
return match e {
784793
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
785-
'x' => self.scan_hex_digits(2u, delim, !ascii_only),
794+
'x' => self.scan_byte_escape(delim, !ascii_only),
786795
'u' if !ascii_only => {
787-
self.scan_hex_digits(4u, delim, false)
796+
if self.curr == Some('{') {
797+
self.scan_unicode_escape(delim)
798+
} else {
799+
let res = self.scan_hex_digits(4u, delim, false);
800+
// SNAP c9f6d69
801+
//let sp = codemap::mk_sp(escaped_pos, self.last_pos);
802+
//self.old_escape_warning(sp);
803+
res
804+
}
788805
}
789806
'U' if !ascii_only => {
790-
self.scan_hex_digits(8u, delim, false)
807+
let res = self.scan_hex_digits(8u, delim, false);
808+
// SNAP c9f6d69
809+
//let sp = codemap::mk_sp(escaped_pos, self.last_pos);
810+
//self.old_escape_warning(sp);
811+
res
791812
}
792813
'\n' if delim == '"' => {
793814
self.consume_whitespace();
@@ -848,6 +869,56 @@ impl<'a> StringReader<'a> {
848869
true
849870
}
850871

872+
/// Scan over a \u{...} escape
873+
///
874+
/// At this point, we have already seen the \ and the u, the { is the current character. We
875+
/// will read at least one digit, and up to 6, and pass over the }.
876+
fn scan_unicode_escape(&mut self, delim: char) -> bool {
877+
self.bump(); // past the {
878+
let start_bpos = self.last_pos;
879+
let mut count: uint = 0;
880+
let mut accum_int = 0;
881+
882+
while !self.curr_is('}') && count <= 6 {
883+
let c = match self.curr {
884+
Some(c) => c,
885+
None => {
886+
self.fatal_span_(start_bpos, self.last_pos,
887+
"unterminated unicode escape (found EOF)");
888+
}
889+
};
890+
accum_int *= 16;
891+
accum_int += c.to_digit(16).unwrap_or_else(|| {
892+
if c == delim {
893+
self.fatal_span_(self.last_pos, self.pos,
894+
"unterminated unicode escape (needed a `}`)");
895+
} else {
896+
self.fatal_span_char(self.last_pos, self.pos,
897+
"illegal character in unicode escape", c);
898+
}
899+
}) as u32;
900+
self.bump();
901+
count += 1;
902+
}
903+
904+
if count > 6 {
905+
self.fatal_span_(start_bpos, self.last_pos,
906+
"overlong unicode escape (can have at most 6 hex digits)");
907+
}
908+
909+
self.bump(); // past the ending }
910+
911+
let mut valid = count >= 1 && count <= 6;
912+
if char::from_u32(accum_int).is_none() {
913+
valid = false;
914+
}
915+
916+
if !valid {
917+
self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape");
918+
}
919+
valid
920+
}
921+
851922
/// Scan over a float exponent.
852923
fn scan_float_exponent(&mut self) {
853924
if self.curr_is('e') || self.curr_is('E') {
@@ -1273,6 +1344,10 @@ impl<'a> StringReader<'a> {
12731344
return token::Byte(id);
12741345
}
12751346

1347+
fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
1348+
self.scan_hex_digits(2, delim, below_0x7f_only)
1349+
}
1350+
12761351
fn scan_byte_string(&mut self) -> token::Lit {
12771352
self.bump();
12781353
let start = self.last_pos;

src/libsyntax/parse/mod.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -393,16 +393,28 @@ pub fn char_lit(lit: &str) -> (char, int) {
393393
let msg = format!("lexer should have rejected a bad character escape {}", lit);
394394
let msg2 = msg.as_slice();
395395

396-
let esc: |uint| -> Option<(char, int)> = |len|
396+
fn esc(len: uint, lit: &str) -> Option<(char, int)> {
397397
num::from_str_radix(lit.slice(2, len), 16)
398398
.and_then(char::from_u32)
399-
.map(|x| (x, len as int));
399+
.map(|x| (x, len as int))
400+
}
401+
402+
let unicode_escape: || -> Option<(char, int)> = ||
403+
if lit.as_bytes()[2] == b'{' {
404+
let idx = lit.find('}').expect(msg2);
405+
let subslice = lit.slice(3, idx);
406+
num::from_str_radix(subslice, 16)
407+
.and_then(char::from_u32)
408+
.map(|x| (x, subslice.char_len() as int + 4))
409+
} else {
410+
esc(6, lit)
411+
};
400412

401413
// Unicode escapes
402414
return match lit.as_bytes()[1] as char {
403-
'x' | 'X' => esc(4),
404-
'u' => esc(6),
405-
'U' => esc(10),
415+
'x' | 'X' => esc(4, lit),
416+
'u' => unicode_escape(),
417+
'U' => esc(10, lit),
406418
_ => None,
407419
}.expect(msg2);
408420
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
pub fn main() {
12+
let s = "\u{2603"; //~ ERROR unterminated unicode escape (needed a `}`)
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
pub fn main() {
12+
let s = "\u{260311111111}"; //~ ERROR overlong unicode escape (can have at most 6 hex digits)
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
pub fn main() {
12+
let s = "\u{d805}"; //~ ERROR illegal unicode character escape
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
pub fn main() {
12+
let s = "\u{lol}"; //~ ERROR illegal character in unicode escape
13+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
pub fn main() {
12+
let s = "\u{2603}";
13+
assert_eq!(s, "☃");
14+
15+
let s = "\u{2a10}\u{2A01}\u{2Aa0}";
16+
assert_eq!(s, "⨐⨁⪠");
17+
18+
let s = "\\{20}";
19+
let mut correct_s = String::from_str("\\");
20+
correct_s.push_str("{20}");
21+
assert_eq!(s, correct_s.as_slice());
22+
}

0 commit comments

Comments
 (0)