Skip to content

Commit

Permalink
Improve unicode support, add \x?? and \u{?}
Browse files Browse the repository at this point in the history
Add support for "\x??"

Add more tests, fix bug

Finish string escapes

Add non ascii test, fix nits

Remove non-standard escapes

Implement char escapes

Allow NUL
  • Loading branch information
torkleyy committed Feb 13, 2018
1 parent d19c857 commit 2e19ca2
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 79 deletions.
4 changes: 2 additions & 2 deletions src/de/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub enum ParseError {
ExpectedStringEnd,
ExpectedIdentifier,

InvalidEscape,
InvalidEscape(&'static str),

NoSuchExtension(String),

Expand Down Expand Up @@ -103,7 +103,7 @@ impl StdError for Error {
ParseError::ExpectedString => "Expected string",
ParseError::ExpectedIdentifier => "Expected identifier",

ParseError::InvalidEscape => "Invalid escape sequence",
ParseError::InvalidEscape(_) => "Invalid escape sequence",

ParseError::Utf8Error(ref e) => e.description(),
ParseError::TrailingCharacters => "Non-whitespace trailing characters",
Expand Down
2 changes: 1 addition & 1 deletion src/de/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ fn test_array() {
assert_eq!(Ok(empty_array), from_str("[]"));

assert_eq!(Ok([2, 3, 4i32]), from_str("(2,3,4,)"));
assert_eq!(Ok(([2, 3, 4i32].to_vec())), from_str("[2,3,4,]"));
assert_eq!(Ok([2, 3, 4i32].to_vec()), from_str("[2,3,4,]"));
}

#[test]
Expand Down
149 changes: 81 additions & 68 deletions src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::char::from_u32 as char_from_u32;
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::ops::Neg;
use std::result::Result as StdResult;
Expand Down Expand Up @@ -91,13 +92,8 @@ impl<'a> Bytes<'a> {

let c = if c == b'\\' {
let _ = self.advance(1);
let c = self.eat_byte()?;

if c != b'\\' && c != b'\'' {
return self.err(ParseError::InvalidEscape);
}

c as char
self.parse_escape()?
} else {
// Check where the end of the char (') is and try to
// interpret the rest as UTF-8
Expand Down Expand Up @@ -211,6 +207,13 @@ impl<'a> Bytes<'a> {
)
}

pub fn expect_byte(&mut self, byte: u8, error: ParseError) -> Result<()> {
self.eat_byte().and_then(|b| match b == byte {
true => Ok(()),
false => self.err(error),
})
}

/// Returns the extensions bit mask.
fn extensions(&mut self) -> Result<Extensions> {
if self.peek() != Some(b'#') {
Expand Down Expand Up @@ -335,6 +338,8 @@ impl<'a> Bytes<'a> {
}

pub fn string(&mut self) -> Result<ParsedStr> {
use std::iter::repeat;

if !self.consume("\"") {
return self.err(ParseError::ExpectedString);
}
Expand All @@ -359,7 +364,15 @@ impl<'a> Bytes<'a> {

loop {
let _ = self.advance(i + 1);
self.parse_str_escape(&mut s)?;
let character = self.parse_escape()?;
match character.len_utf8() {
1 => s.push(character as u8),
len => {
let start = s.len();
s.extend(repeat(0).take(len));
character.encode_utf8(&mut s[start..]);
}
}

let (new_i, end_or_escape) = self.bytes
.iter()
Expand Down Expand Up @@ -421,86 +434,75 @@ impl<'a> Bytes<'a> {
res
}

fn decode_hex_escape(&mut self) -> Result<u16> {
fn decode_ascii_escape(&mut self) -> Result<u8> {
let mut n = 0;
for _ in 0..4 {
n = match self.eat_byte()? {
c @ b'0'...b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)),
b'a' | b'A' => n * 16_u16 + 10_u16,
b'b' | b'B' => n * 16_u16 + 11_u16,
b'c' | b'C' => n * 16_u16 + 12_u16,
b'd' | b'D' => n * 16_u16 + 13_u16,
b'e' | b'E' => n * 16_u16 + 14_u16,
b'f' | b'F' => n * 16_u16 + 15_u16,
_ => {
return self.err(ParseError::InvalidEscape);
}
};
for _ in 0..2 {
n = n << 4;
let byte = self.eat_byte()?;
let decoded = self.decode_hex(byte)?;
n |= decoded;
}

Ok(n)
}

fn parse_str_escape(&mut self, store: &mut Vec<u8>) -> Result<()> {
use std::iter::repeat;
fn decode_hex(&self, c: u8) -> Result<u8> {
match c {
c @ b'0'...b'9' => Ok(c - b'0'),
c @ b'a'...b'f' => Ok(10 + c - b'a'),
c @ b'A'...b'F' => Ok(10 + c - b'A'),
_ => self.err(ParseError::InvalidEscape("Non-hex digit found")),
}
}

match self.eat_byte()? {
b'"' => store.push(b'"'),
b'\\' => store.push(b'\\'),
b'b' => store.push(b'\x08'),
b'f' => store.push(b'\x0c'),
b'n' => store.push(b'\n'),
b'r' => store.push(b'\r'),
b't' => store.push(b'\t'),
fn parse_escape(&mut self) -> Result<char> {
let c = match self.eat_byte()? {
b'\'' => '\'',
b'"' => '"',
b'\\' => '\\',
b'n' => '\n',
b'r' => '\r',
b't' => '\t',
b'x' => self.decode_ascii_escape()? as char,
b'u' => {
let c: char = match self.decode_hex_escape()? {
0xDC00...0xDFFF => {
return self.err(ParseError::InvalidEscape);
}
self.expect_byte(b'{', ParseError::InvalidEscape("Missing {"))?;

n1 @ 0xD800...0xDBFF => {
if self.eat_byte()? != b'\\' {
return self.err(ParseError::InvalidEscape);
}
let mut bytes: u32 = 0;
let mut num_digits = 0;

if self.eat_byte()? != b'u' {
return self.err(ParseError::InvalidEscape);
}
while num_digits < 6 {
let byte = self.peek_or_eof()?;

let n2 = self.decode_hex_escape()?;
if byte == b'}' {
break;
} else {
self.advance_single()?;
}

if n2 < 0xDC00 || n2 > 0xDFFF {
return self.err(ParseError::InvalidEscape);
}
let byte = self.decode_hex(byte)?;
bytes = bytes << 4;
bytes |= byte as u32;

let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
num_digits += 1;
}

match ::std::char::from_u32(n as u32) {
Some(c) => c,
None => {
return self.err(ParseError::InvalidEscape);
}
}
}
if num_digits == 0 {
return self.err(ParseError::InvalidEscape(
"Expected 1-6 digits, got 0 digits",
));
}

n => match ::std::char::from_u32(n as u32) {
Some(c) => c,
None => {
return self.err(ParseError::InvalidEscape);
}
},
};

let char_start = store.len();
store.extend(repeat(0).take(c.len_utf8()));
c.encode_utf8(&mut store[char_start..]);
self.expect_byte(b'}', ParseError::InvalidEscape("No } at the end"))?;
let character = char_from_u32(bytes)
.ok_or_else(|| self.error(ParseError::InvalidEscape("Not a valid char")))?;
character
}
_ => {
return self.err(ParseError::InvalidEscape);
return self.err(ParseError::InvalidEscape("Unknown escape character"));
}
}
};

Ok(())
Ok(c)
}

fn skip_comment(&mut self) -> bool {
Expand Down Expand Up @@ -570,3 +572,14 @@ impl Display for Position {
write!(f, "{}:{}", self.line, self.col)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn decode_x10() {
let mut bytes = Bytes::new(b"10").unwrap();
assert_eq!(bytes.decode_ascii_escape(), Ok(0x10));
}
}
16 changes: 8 additions & 8 deletions src/ser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ impl Serializer {
.extend((0..pretty.indent).map(|_| config.indentor.as_str()));
}
}

fn serialize_escaped_str(&mut self, value: &str) {
self.output += "\"";
self.output.extend(value.chars().flat_map(|c| c.escape_debug()));
self.output += "\"";
}
}

impl<'a> ser::Serializer for &'a mut Serializer {
Expand Down Expand Up @@ -231,14 +237,8 @@ impl<'a> ser::Serializer for &'a mut Serializer {
}

fn serialize_str(self, v: &str) -> Result<()> {
self.output += "\"";
for char in v.chars() {
if char == '\\' || char == '"' {
self.output.push('\\');
}
self.output.push(char);
}
self.output += "\"";
self.serialize_escaped_str(v);

Ok(())
}

Expand Down
70 changes: 70 additions & 0 deletions tests/escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
extern crate ron;
extern crate serde;

use std::char::from_u32;
use std::fmt::Debug;

use ron::de::from_str;
use ron::ser::to_string;
use serde::{Deserialize, Serialize};

#[test]
fn test_escape_basic() {
assert_eq!(to_string(&"\x07").unwrap(), "\"\\u{7}\"");

assert_eq!(from_str::<String>("\"\\x07\"").unwrap(), "\x07");
assert_eq!(from_str::<String>("\"\\u{7}\"").unwrap(), "\x07");
}

fn check_same<T>(t: T)
where
T: Debug + for<'a> Deserialize<'a> + PartialEq + Serialize,
{
let s: String = to_string(&t).unwrap();

println!("Serialized: \n\n{}\n\n", s);

assert_eq!(from_str(&s), Ok(t));
}

#[test]
fn test_ascii_10() {
check_same("\u{10}".to_owned());
}

#[test]
fn test_ascii_chars() {
(1..128).into_iter().flat_map(from_u32).for_each(check_same)
}

#[test]
fn test_ascii_string() {
let s: String = (1..128).into_iter().flat_map(from_u32).collect();

check_same(s);
}

#[test]
fn test_non_ascii() {
assert_eq!(to_string(&"♠").unwrap(), "\"\"");
assert_eq!(to_string(&"ß").unwrap(), "\"ß\"");
assert_eq!(to_string(&"ä").unwrap(), "\"ä\"");
assert_eq!(to_string(&"ö").unwrap(), "\"ö\"");
assert_eq!(to_string(&"ü").unwrap(), "\"ü\"");
}

#[test]
fn test_chars() {
assert_eq!(to_string(&'♠').unwrap(), "'♠'");
assert_eq!(to_string(&'ß').unwrap(), "'ß'");
assert_eq!(to_string(&'ä').unwrap(), "'ä'");
assert_eq!(to_string(&'ö').unwrap(), "'ö'");
assert_eq!(to_string(&'ü').unwrap(), "'ü'");
assert_eq!(to_string(&'\u{715}').unwrap(), "'\u{715}'");
assert_eq!(from_str::<char>("'\u{715}'").unwrap(), from_str("'\\u{715}'").unwrap());
}

#[test]
fn test_nul_in_string() {
check_same("Hello\0World!".to_owned());
}

0 comments on commit 2e19ca2

Please sign in to comment.