Skip to content

Commit 031a655

Browse files
committed
parsing string values; added tests
1 parent b5f49b8 commit 031a655

File tree

1 file changed

+137
-20
lines changed

1 file changed

+137
-20
lines changed

src/parser.rs

Lines changed: 137 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
// #![deny(missing_docs)]
22

33
use std::collections::HashMap;
4-
54
use std::marker::PhantomData;
65

76
use combine::{Parser, ConsumedResult, Stream};
8-
use combine::char::{tab, char, crlf, string, letter, alpha_num, digit};
7+
use combine::char::{tab, char, crlf, string, letter, alpha_num, digit, hex_digit};
98
use combine::combinator::{between, many, many1, none_of, one_of, or, optional, value, try, parser};
109
use combine::primitives::{ParseError, Error, Consumed};
1110

@@ -439,11 +438,11 @@ make_parser!(
439438
result
440439
})
441440
.and(
442-
// FractionalPart ExponentialPart
441+
// FractionalPart ExponentialPart
443442
try(FractionalPart::new().map(Some).and(ExponentialPart::new().map(Some)))
444-
// FractionalPart
443+
// FractionalPart
445444
.or(FractionalPart::new().map(Some).and(value(None)))
446-
// ExponentialPart
445+
// ExponentialPart
447446
.or(value(None).and(ExponentialPart::new().map(Some)))
448447
)
449448
.map(|(int_part,(opt_fract_part, opt_exp_part)) : (String, (Option<String>,Option<String>))| {
@@ -454,23 +453,23 @@ make_parser!(
454453

455454
match opt_fract_part {
456455
Some(fract_part) => {
457-
// add the fractional part first
456+
// add the fractional part first
458457
result.push_str(&fract_part);
459458

460-
// if exponential part is there, we can just add it
459+
// if exponential part is there, we can just add it
461460
if let Some(exp_part) = opt_exp_part {
462461
result.push_str(&exp_part);
463462
}
464463
},
465464
None => {
466-
// to make rust parse it correctly, it needs to have a fractional number before the exponent part
465+
// to make rust parse it correctly, it needs to have a fractional number before the exponent part
467466
if let Some(exp_part) = opt_exp_part {
468467
result.push_str(".0");
469468
result.push_str(&exp_part);
470469
}
471470
}
472471
}
473-
// finally let rust parse it
472+
// finally let rust parse it
474473
result.parse::<f32>().unwrap()
475474
})
476475
.map(Value::Float)
@@ -518,6 +517,86 @@ make_parser!(
518517
}
519518
);
520519

520+
make_parser!(
521+
StringValue(input: char) -> Value {
522+
between(
523+
char('"'),
524+
char('"'),
525+
many::<Vec<String>,_>(
526+
try(EscapedUnicode::new())
527+
.or(try(EscapedCharacter::new()))
528+
.or(none_of("\"\\\r\n".chars()).map(|c: char| c.to_string()))
529+
)
530+
)
531+
.map(|vec| {
532+
vec.iter()
533+
.cloned()
534+
.fold(String::from(""), |mut acc,s| {
535+
acc.push_str(&s);
536+
acc
537+
})
538+
})
539+
.map(Value::String)
540+
.skip(many::<Vec<_>,_>(or(WhiteSpace::new(), LineTerminator::new(&true))))
541+
.parse_lazy(input)
542+
}
543+
);
544+
545+
fn hex_digit_to_u8(input: char) -> u8 {
546+
match input {
547+
'0' ... '9' => (input as u8) - ('0' as u8),
548+
'a' ... 'f' => (input as u8) - ('a' as u8) + 10,
549+
'A' ... 'F' => (input as u8) - ('A' as u8) + 10,
550+
_ => 0,
551+
}
552+
}
553+
554+
make_parser!(
555+
EscapedUnicode(input: char) -> String {
556+
string("\\u")
557+
.with(
558+
hex_digit().and(hex_digit()).and(hex_digit()).and(hex_digit())
559+
.map(|(((b1,b2),b3),b4)| {
560+
let left = hex_digit_to_u8(b1) << 4 ^ hex_digit_to_u8(b2);
561+
let right = hex_digit_to_u8(b3) << 4 ^ hex_digit_to_u8(b4);
562+
563+
let mut bytes = vec![];
564+
if left != 0 {
565+
bytes.push(left);
566+
}
567+
if right != 0 {
568+
bytes.push(right);
569+
}
570+
571+
String::from_utf8(bytes).unwrap()
572+
})
573+
)
574+
.parse_lazy(input)
575+
}
576+
);
577+
578+
make_parser!(
579+
EscapedCharacter(input: char) -> String {
580+
char('\\')
581+
.and(one_of("\"/\\bfnrt".chars()))
582+
.map(|(_,b)| {
583+
match b {
584+
'b' => String::from("\x08"),
585+
'f' => String::from("\x0C"),
586+
'n' => String::from("\n"),
587+
'r' => String::from("\r"),
588+
't' => String::from("\t"),
589+
c => {
590+
let mut result = String::new();
591+
result.push(c);
592+
result
593+
}
594+
}
595+
})
596+
.parse_lazy(input)
597+
}
598+
);
599+
521600
make_parser!(
522601
NullValue(input: char) -> Value {
523602
string("null")
@@ -574,14 +653,18 @@ make_parser!(
574653

575654
make_parser!(
576655
ObjectValue(input: char, constant: &bool) -> Value {
577-
between(char('{')
578-
.skip(many::<Vec<_>,_>(or(WhiteSpace::new(), LineTerminator::new(&true)))), char('}'), many::<Vec<_>,_>(ObjectField::new(constant)))
656+
between(
657+
char('{')
658+
.skip(many::<Vec<_>,_>(or(WhiteSpace::new(), LineTerminator::new(&true)))),
659+
char('}'),
660+
many::<Vec<_>,_>(ObjectField::new(constant))
661+
)
579662
.skip(many::<Vec<_>,_>(or(WhiteSpace::new(), LineTerminator::new(&true))))
580663
.map(|fields| {
581664
let mut result = HashMap::new();
582665

583666
for (name,value) in fields.into_iter() {
584-
// TODO complain about same name fields?
667+
// TODO complain about same name fields?
585668
result.insert(name, value);
586669
}
587670

@@ -744,8 +827,12 @@ mod tests {
744827

745828
#[test]
746829
fn test_parse_const_listvalue() {
747-
assert_successful_parse!(ListValue::new(&true), "[null]", Value::List(vec![Value::Null]));
748-
assert_successful_parse!(ListValue::new(&true), "[null true false]", Value::List(vec![Value::Null, Value::Boolean(true), Value::Boolean(false)]));
830+
assert_successful_parse!(ListValue::new(&true),
831+
"[null]",
832+
Value::List(vec![Value::Null]));
833+
assert_successful_parse!(ListValue::new(&true),
834+
"[null true false]",
835+
Value::List(vec![Value::Null, Value::Boolean(true), Value::Boolean(false)]));
749836
}
750837

751838
#[test]
@@ -756,9 +843,9 @@ mod tests {
756843
match result {
757844
Err(err) => {
758845
assert!(format!("{}", err).contains("Unexpected `true`"));
759-
},
846+
}
760847
// it should be an error
761-
_ => assert!(false)
848+
_ => assert!(false),
762849
}
763850
}
764851

@@ -768,9 +855,9 @@ mod tests {
768855
match result {
769856
Err(err) => {
770857
assert!(format!("{}", err).contains("Unexpected `false`"));
771-
},
858+
}
772859
// it should be an error
773-
_ => assert!(false)
860+
_ => assert!(false),
774861
}
775862
}
776863

@@ -780,9 +867,9 @@ mod tests {
780867
match result {
781868
Err(err) => {
782869
assert!(format!("{}", err).contains("Unexpected `null`"));
783-
},
870+
}
784871
// it should be an error
785-
_ => assert!(false)
872+
_ => assert!(false),
786873
}
787874
}
788875
}
@@ -805,4 +892,34 @@ mod tests {
805892

806893
assert_successful_parse!(ObjectValue::new(&true), "{ x : 1 }", value);
807894
}
895+
896+
#[test]
897+
fn test_parse_string_unicodeescape() {
898+
// unicode string
899+
assert_successful_parse!(StringValue, "\"\\u0025\"", Value::String(String::from("%")));
900+
assert_successful_parse!(StringValue, "\"\\u0040\"", Value::String(String::from("@")));
901+
}
902+
903+
#[test]
904+
fn test_parse_string_escaped() {
905+
assert_successful_parse!(StringValue, "\"\\\"\"", Value::String(String::from("\"")));
906+
assert_successful_parse!(StringValue, "\"\\\\\"", Value::String(String::from("\\")));
907+
assert_successful_parse!(StringValue, "\"\\/\"", Value::String(String::from("/")));
908+
assert_successful_parse!(StringValue, "\"\\b\"", Value::String(String::from("\x08")));
909+
assert_successful_parse!(StringValue, "\"\\f\"", Value::String(String::from("\x0C")));
910+
assert_successful_parse!(StringValue, "\"\\n\"", Value::String(String::from("\n")));
911+
assert_successful_parse!(StringValue, "\"\\r\"", Value::String(String::from("\r")));
912+
assert_successful_parse!(StringValue, "\"\\t\"", Value::String(String::from("\t")));
913+
}
914+
915+
#[test]
916+
fn test_parse_stringvalue() {
917+
// empty string
918+
assert_successful_parse!(StringValue, "\"\"", Value::String(String::from("")));
919+
920+
// strings with random stuff in it
921+
assert_successful_parse!(StringValue, "\"hello world\"", Value::String(String::from("hello world")));
922+
assert_successful_parse!(StringValue, "\"hello \\u0025\"", Value::String(String::from("hello %")));
923+
assert_successful_parse!(StringValue, "\"hello\\n\\u0025\"", Value::String(String::from("hello\n%")));
924+
}
808925
}

0 commit comments

Comments
 (0)