diff --git a/src/de/error.rs b/src/de/error.rs index e21975b0..194e6feb 100644 --- a/src/de/error.rs +++ b/src/de/error.rs @@ -44,7 +44,7 @@ pub enum ParseError { ExpectedStringEnd, ExpectedIdentifier, - InvalidEscape, + InvalidEscape(&'static str), NoSuchExtension(String), @@ -103,7 +103,7 @@ impl StdError for Error { ParseError::ExpectedString => "Expected string", ParseError::ExpectedIdentifier => "Expected identifier", - ParseError::InvalidEscape => "Invalid escape sequence", + ParseError::InvalidEscape(_) => "Invalid escape sequence", ParseError::Utf8Error(ref e) => e.description(), ParseError::TrailingCharacters => "Non-whitespace trailing characters", diff --git a/src/de/tests.rs b/src/de/tests.rs index 6d70c502..eac34a8e 100644 --- a/src/de/tests.rs +++ b/src/de/tests.rs @@ -68,7 +68,7 @@ fn test_array() { assert_eq!(Ok(empty_array), from_str("[]")); assert_eq!(Ok([2, 3, 4i32]), from_str("(2,3,4,)")); - assert_eq!(Ok(([2, 3, 4i32].to_vec())), from_str("[2,3,4,]")); + assert_eq!(Ok([2, 3, 4i32].to_vec()), from_str("[2,3,4,]")); } #[test] diff --git a/src/parse.rs b/src/parse.rs index 2b849cb3..9b774b19 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,3 +1,4 @@ +use std::char::from_u32 as char_from_u32; use std::fmt::{Display, Formatter, Result as FmtResult}; use std::ops::Neg; use std::result::Result as StdResult; @@ -91,13 +92,8 @@ impl<'a> Bytes<'a> { let c = if c == b'\\' { let _ = self.advance(1); - let c = self.eat_byte()?; - if c != b'\\' && c != b'\'' { - return self.err(ParseError::InvalidEscape); - } - - c as char + self.parse_escape()? } else { // Check where the end of the char (') is and try to // interpret the rest as UTF-8 @@ -211,6 +207,13 @@ impl<'a> Bytes<'a> { ) } + pub fn expect_byte(&mut self, byte: u8, error: ParseError) -> Result<()> { + self.eat_byte().and_then(|b| match b == byte { + true => Ok(()), + false => self.err(error), + }) + } + /// Returns the extensions bit mask. fn extensions(&mut self) -> Result { if self.peek() != Some(b'#') { @@ -335,6 +338,8 @@ impl<'a> Bytes<'a> { } pub fn string(&mut self) -> Result { + use std::iter::repeat; + if !self.consume("\"") { return self.err(ParseError::ExpectedString); } @@ -359,7 +364,15 @@ impl<'a> Bytes<'a> { loop { let _ = self.advance(i + 1); - self.parse_str_escape(&mut s)?; + let character = self.parse_escape()?; + match character.len_utf8() { + 1 => s.push(character as u8), + len => { + let start = s.len(); + s.extend(repeat(0).take(len)); + character.encode_utf8(&mut s[start..]); + } + } let (new_i, end_or_escape) = self.bytes .iter() @@ -421,86 +434,75 @@ impl<'a> Bytes<'a> { res } - fn decode_hex_escape(&mut self) -> Result { + fn decode_ascii_escape(&mut self) -> Result { let mut n = 0; - for _ in 0..4 { - n = match self.eat_byte()? { - c @ b'0'...b'9' => n * 16_u16 + ((c as u16) - (b'0' as u16)), - b'a' | b'A' => n * 16_u16 + 10_u16, - b'b' | b'B' => n * 16_u16 + 11_u16, - b'c' | b'C' => n * 16_u16 + 12_u16, - b'd' | b'D' => n * 16_u16 + 13_u16, - b'e' | b'E' => n * 16_u16 + 14_u16, - b'f' | b'F' => n * 16_u16 + 15_u16, - _ => { - return self.err(ParseError::InvalidEscape); - } - }; + for _ in 0..2 { + n = n << 4; + let byte = self.eat_byte()?; + let decoded = self.decode_hex(byte)?; + n |= decoded; } Ok(n) } - fn parse_str_escape(&mut self, store: &mut Vec) -> Result<()> { - use std::iter::repeat; + fn decode_hex(&self, c: u8) -> Result { + match c { + c @ b'0'...b'9' => Ok(c - b'0'), + c @ b'a'...b'f' => Ok(10 + c - b'a'), + c @ b'A'...b'F' => Ok(10 + c - b'A'), + _ => self.err(ParseError::InvalidEscape("Non-hex digit found")), + } + } - match self.eat_byte()? { - b'"' => store.push(b'"'), - b'\\' => store.push(b'\\'), - b'b' => store.push(b'\x08'), - b'f' => store.push(b'\x0c'), - b'n' => store.push(b'\n'), - b'r' => store.push(b'\r'), - b't' => store.push(b'\t'), + fn parse_escape(&mut self) -> Result { + let c = match self.eat_byte()? { + b'\'' => '\'', + b'"' => '"', + b'\\' => '\\', + b'n' => '\n', + b'r' => '\r', + b't' => '\t', + b'x' => self.decode_ascii_escape()? as char, b'u' => { - let c: char = match self.decode_hex_escape()? { - 0xDC00...0xDFFF => { - return self.err(ParseError::InvalidEscape); - } + self.expect_byte(b'{', ParseError::InvalidEscape("Missing {"))?; - n1 @ 0xD800...0xDBFF => { - if self.eat_byte()? != b'\\' { - return self.err(ParseError::InvalidEscape); - } + let mut bytes: u32 = 0; + let mut num_digits = 0; - if self.eat_byte()? != b'u' { - return self.err(ParseError::InvalidEscape); - } + while num_digits < 6 { + let byte = self.peek_or_eof()?; - let n2 = self.decode_hex_escape()?; + if byte == b'}' { + break; + } else { + self.advance_single()?; + } - if n2 < 0xDC00 || n2 > 0xDFFF { - return self.err(ParseError::InvalidEscape); - } + let byte = self.decode_hex(byte)?; + bytes = bytes << 4; + bytes |= byte as u32; - let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; + num_digits += 1; + } - match ::std::char::from_u32(n as u32) { - Some(c) => c, - None => { - return self.err(ParseError::InvalidEscape); - } - } - } + if num_digits == 0 { + return self.err(ParseError::InvalidEscape( + "Expected 1-6 digits, got 0 digits", + )); + } - n => match ::std::char::from_u32(n as u32) { - Some(c) => c, - None => { - return self.err(ParseError::InvalidEscape); - } - }, - }; - - let char_start = store.len(); - store.extend(repeat(0).take(c.len_utf8())); - c.encode_utf8(&mut store[char_start..]); + self.expect_byte(b'}', ParseError::InvalidEscape("No } at the end"))?; + let character = char_from_u32(bytes) + .ok_or_else(|| self.error(ParseError::InvalidEscape("Not a valid char")))?; + character } _ => { - return self.err(ParseError::InvalidEscape); + return self.err(ParseError::InvalidEscape("Unknown escape character")); } - } + }; - Ok(()) + Ok(c) } fn skip_comment(&mut self) -> bool { @@ -570,3 +572,14 @@ impl Display for Position { write!(f, "{}:{}", self.line, self.col) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decode_x10() { + let mut bytes = Bytes::new(b"10").unwrap(); + assert_eq!(bytes.decode_ascii_escape(), Ok(0x10)); + } +} diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 42c28ab4..f3b89eab 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -156,6 +156,12 @@ impl Serializer { .extend((0..pretty.indent).map(|_| config.indentor.as_str())); } } + + fn serialize_escaped_str(&mut self, value: &str) { + self.output += "\""; + self.output.extend(value.chars().flat_map(|c| c.escape_debug())); + self.output += "\""; + } } impl<'a> ser::Serializer for &'a mut Serializer { @@ -231,14 +237,8 @@ impl<'a> ser::Serializer for &'a mut Serializer { } fn serialize_str(self, v: &str) -> Result<()> { - self.output += "\""; - for char in v.chars() { - if char == '\\' || char == '"' { - self.output.push('\\'); - } - self.output.push(char); - } - self.output += "\""; + self.serialize_escaped_str(v); + Ok(()) } diff --git a/tests/escape.rs b/tests/escape.rs new file mode 100644 index 00000000..d715e776 --- /dev/null +++ b/tests/escape.rs @@ -0,0 +1,70 @@ +extern crate ron; +extern crate serde; + +use std::char::from_u32; +use std::fmt::Debug; + +use ron::de::from_str; +use ron::ser::to_string; +use serde::{Deserialize, Serialize}; + +#[test] +fn test_escape_basic() { + assert_eq!(to_string(&"\x07").unwrap(), "\"\\u{7}\""); + + assert_eq!(from_str::("\"\\x07\"").unwrap(), "\x07"); + assert_eq!(from_str::("\"\\u{7}\"").unwrap(), "\x07"); +} + +fn check_same(t: T) +where + T: Debug + for<'a> Deserialize<'a> + PartialEq + Serialize, +{ + let s: String = to_string(&t).unwrap(); + + println!("Serialized: \n\n{}\n\n", s); + + assert_eq!(from_str(&s), Ok(t)); +} + +#[test] +fn test_ascii_10() { + check_same("\u{10}".to_owned()); +} + +#[test] +fn test_ascii_chars() { + (1..128).into_iter().flat_map(from_u32).for_each(check_same) +} + +#[test] +fn test_ascii_string() { + let s: String = (1..128).into_iter().flat_map(from_u32).collect(); + + check_same(s); +} + +#[test] +fn test_non_ascii() { + assert_eq!(to_string(&"♠").unwrap(), "\"♠\""); + assert_eq!(to_string(&"ß").unwrap(), "\"ß\""); + assert_eq!(to_string(&"ä").unwrap(), "\"ä\""); + assert_eq!(to_string(&"ö").unwrap(), "\"ö\""); + assert_eq!(to_string(&"ü").unwrap(), "\"ü\""); +} + +#[test] +fn test_chars() { + assert_eq!(to_string(&'♠').unwrap(), "'♠'"); + assert_eq!(to_string(&'ß').unwrap(), "'ß'"); + assert_eq!(to_string(&'ä').unwrap(), "'ä'"); + assert_eq!(to_string(&'ö').unwrap(), "'ö'"); + assert_eq!(to_string(&'ü').unwrap(), "'ü'"); + assert_eq!(to_string(&'\u{715}').unwrap(), "'\u{715}'"); + assert_eq!(from_str::("'\u{715}'").unwrap(), from_str("'\\u{715}'").unwrap()); +} + +#[test] +fn test_nul_in_string() { + check_same("Hello\0World!".to_owned()); +}