diff --git a/doc/parser.qbk b/doc/parser.qbk index 89124b72..8f1f906f 100644 --- a/doc/parser.qbk +++ b/doc/parser.qbk @@ -41,6 +41,7 @@ [import ../example/user_error_handler.cpp] [import ../test/parser.cpp] [import ../test/parser_rule.cpp] +[import ../test/parser_quoted_string.cpp] [import ../include/boost/parser/concepts.hpp] [import ../include/boost/parser/error_handling_fwd.hpp] @@ -218,6 +219,8 @@ [def _lower_ [globalref boost::parser::lower `lower`]] [def _upper_ [globalref boost::parser::upper `upper`]] +[def _quot_str_ [globalref boost::parser::quoted_string `quoted_string`]] + [def _RES_ ['[^RESOLVE]]`()`] [def _RES_np_ ['[^RESOLVE]]] [def _ATTR_ ['[^ATTR]]`()`] diff --git a/doc/tables.qbk b/doc/tables.qbk index a889bcd7..211749e4 100644 --- a/doc/tables.qbk +++ b/doc/tables.qbk @@ -38,6 +38,8 @@ itself be used as a parser; it must be called. In the table below: * `p`, `p1`, `p2`, ... are parsers. +* `escapes` is a _symbols_t_ object, where `T` is `char` or `char32_t`. + [note The definition of `parsable_range_like` is: [parsable_range_like_concept] @@ -326,6 +328,31 @@ the input they match unless otherwise stated in the table below.] [ _symbols_ is an associative container of key, value pairs. Each key is a _std_str_ and each value has type `T`. In the Unicode parsing path, the strings are considered to be UTF-8 encoded; in the non-Unicode path, no encoding is assumed. _symbols_ Matches the longest prefix `pre` of the input that is equal to one of the keys `k`. If the length `len` of `pre` is zero, and there is no zero-length key, it does not match the input. If `len` is positive, the generated attribute is the value associated with `k`.] [ `T` ] [ Unlike the other entries in this table, _symbols_ is a type, not an object. ]] + + [[ _quot_str_ ] + [ Matches `'"'`, followed by zero or more characters, followed by `'"'`. ] + [ _std_str_ ] + [ The result does not include the quotes. A quote within the string can be written by escaping it with a backslash. A backslash within the string can be written by writing two consecutive backslashes. Any other use of a backslash will fail the parse. Skipping is disabled while parsing the entire string, as if using _lexeme_. ]] + + [[ `_quot_str_(c)` ] + [ Matches `c`, followed by zero or more characters, followed by `c`. ] + [ _std_str_ ] + [ The result does not include the `c` quotes. A `c` within the string can be written by escaping it with a backslash. A backslash within the string can be written by writing two consecutive backslashes. Any other use of a backslash will fail the parse. Skipping is disabled while parsing the entire string, as if using _lexeme_. ]] + + [[ `_quot_str_(r)` ] + [ Matches some character `Q` in `r`, followed by zero or more characters, followed by `Q`. ] + [ _std_str_ ] + [ The result does not include the `Q` quotes. A `Q` within the string can be written by escaping it with a backslash. A backslash within the string can be written by writing two consecutive backslashes. Any other use of a backslash will fail the parse. Skipping is disabled while parsing the entire string, as if using _lexeme_. ]] + + [[ `_quot_str_(c, symbols)` ] + [ Matches `c`, followed by zero or more characters, followed by `c`. ] + [ _std_str_ ] + [ The result does not include the `c` quotes. A `c` within the string can be written by escaping it with a backslash. A backslash within the string can be written by writing two consecutive backslashes. A backslash followed by a successful match using `symbols` will be interpreted as the corresponding value produced by `symbols`. Any other use of a backslash will fail the parse. Skipping is disabled while parsing the entire string, as if using _lexeme_. ]] + + [[ `_quot_str_(r, symbols)` ] + [ Matches some character `Q` in `r`, followed by zero or more characters, followed by `Q`. ] + [ _std_str_ ] + [ The result does not include the `Q` quotes. A `Q` within the string can be written by escaping it with a backslash. A backslash within the string can be written by writing two consecutive backslashes. A backslash followed by a successful match using `symbols` will be interpreted as the corresponding value produced by `symbols`. Any other use of a backslash will fail the parse. Skipping is disabled while parsing the entire string, as if using _lexeme_. ]] ] [important All the character parsers, like _ch_, _cp_ and _cu_ produce either diff --git a/doc/tutorial.qbk b/doc/tutorial.qbk index 1393a81c..7663cbbe 100644 --- a/doc/tutorial.qbk +++ b/doc/tutorial.qbk @@ -738,6 +738,106 @@ no sense. [endsect] +[section Alternative Parsers] + +Frequently, you need to parse something that might have one of several forms. +`operator|` is overloaded to form alternative parsers. For example: + + namespace bp = boost::parser; + auto const parser_1 = bp::int_ | bp::eps; + +`parser_1` matches an integer, or if that fails, it matches /epsilon/, the +empty string. This is equivalent to writing: + + namespace bp = boost::parser; + auto const parser_2 = -bp::int_; + +However, neither `parser_1` nor `parser_2` is equivalent to writing this: + + namespace bp = boost::parser; + auto const parser_3 = bp::eps | bp::int_; // Does not do what you think. + +The reason is that alternative parsers try each of their subparsers, one at a +time, and stop on the first one that matches. /Epsilon/ matches anything, +since it is zero length and consumes no input. It even matches the end of +input. This means that `parser_3` is equivalent to _e_ by itself. + +[note For this reason, writing `_e_ | p` for any parser p is considered a bug. +Debug builds will assert when `_e_ | p` is encountered. ] + +[warning This kind of error is very common when _e_ is involved, and also very +easy to detect. However, it is possible to write `P1 >> P2`, where `P1` is a +prefix of `P2`, such as `int_ | int >> int_`, or `repeat(4)[hex_digit] | +repeat(8)[hex_digit]`. This is almost certainly an error, but is impossible +to detect in the general case _emdash_ remember that _rs_ can be separately +compiled, and consider a pair of rules whose associated `_def` parsers are +`int_` and `int_ >> int_`, respectively.] + +[endsect] + +[section Parsing Quoted Strings] + +It is very common to need to parse quoted strings. Quoted strings are +slightly tricky, though, when using a skipper (and you should be using a +skipper 99% of the time). You don't want to allow arbitrary whitespace in the +middle of your strings, and you also don't want to remove all whitespace from +your strings. Both of these things will happen with the typical skipper, +_ws_. + +So, here is how most people would write a quoted string parser: + + namespace bp = boost::parser; + const auto string = bp::lexeme['"' >> *(bp::char_ - '"') > '"']; + +Some things to note: + +* the result is a string; + +* the quotes are not included in the result; + +* there is an expectation point before the close-quote; + +* the use of _lexeme_ disables skipping in the parser, and it must be written + around the quotes, not around the `operator*` expression; and + +* there's no way to write a quote in the middle of the string. + +This is a very common pattern. I have written a quoted string parser like +this dozens of times. The parser above is the quick-and-dirty version. A +more robust version would be able to handle escaped quotes within the string, +and then would immediately also need to support escaped escape characters. + +_Parser_ provides _quot_str_ to use in place of this very common pattern. It +supports quote- and escaped-character-escaping, using backslash as the escape +character. + +[quoted_string_example_1_2] + +As common as this use case is, there are very similar use cases that it does +not cover. So, _quot_str_ has some options. If you call it with a single +character, it returns a _quot_str_ that uses that single character as the +quote-character. + +[quoted_string_example_3] + +You can also supply a range of characters. One of the characters from the +range must quote the whole string; mismatches are not allowed. Think of how +Python allows you to quote a string with either `'"'` or `'\''`, but the same +character must be used on both sides. + +[quoted_string_example_4] + +Another common thing to do in a quoted string parser is to recognize escape +sequences. If you have simple escape sequencecs that do not require any real +parsing, like say the simple escape sequences from C++, you can provide a +_symbols_ object as well. The template parameter `T` to _symbols_t_ must be +`char` or `char32_t`. You don't need to include the escaped backslash or the +escaped quote character, since those always work. + +[quoted_string_example_5] + +[endsect] + [section Parsing In Detail] Now that you've seen some examples, let's see how parsing works in a bit more @@ -1052,43 +1152,6 @@ for more information.] [endsect] -[section Alternative Parsers] - -Frequently, you need to parse something that might have one of several forms. -`operator|` is overloaded to form alternative parsers. For example: - - namespace bp = boost::parser; - auto const parser_1 = bp::int_ | bp::eps; - -`parser_1` matches an integer, or if that fails, it matches /epsilon/, the -empty string. This is equivalent to writing: - - namespace bp = boost::parser; - auto const parser_2 = -bp::int_; - -However, neither `parser_1` nor `parser_2` is equivalent to writing this: - - namespace bp = boost::parser; - auto const parser_3 = bp::eps | bp::int_; // Does not do what you think. - -The reason is that alternative parsers try each of their subparsers, one at a -time, and stop on the first one that matches. /Epsilon/ matches anything, -since it is zero length and consumes no input. It even matches the end of -input. This means that `parser_3` is equivalent to _e_ by itself. - -[note For this reason, writing `_e_ | p` for any parser p is considered a bug. -Debug builds will assert when `_e_ | p` is encountered. ] - -[warning This kind of error is very common when _e_ is involved, and also very -easy to detect. However, it is possible to write `P1 >> P2`, where `P1` is a -prefix of `P2`, such as `int_ | int >> int_`, or `repeat(4)[hex_digit] | -repeat(8)[hex_digit]`. This is almost certainly an error, but is impossible -to detect in the general case _emdash_ remember that _rs_ can be separately -compiled, and consider a pair of rules whose associated `_def` parsers are -`int_` and `int_ >> int_`, respectively.] - -[endsect] - [section The Parsers And Their Uses] _Parser_ comes with all the parsers most parsing tasks will ever need. Each diff --git a/include/boost/parser/detail/printing.hpp b/include/boost/parser/detail/printing.hpp index d1031ab1..5f59adf7 100644 --- a/include/boost/parser/detail/printing.hpp +++ b/include/boost/parser/detail/printing.hpp @@ -278,6 +278,13 @@ namespace boost { namespace parser { namespace detail { std::ostream & os, int components = 0); + template + void print_parser( + Context const & context, + quoted_string_parser const & parser, + std::ostream & os, + int components = 0); + template void print_parser( Context const & context, diff --git a/include/boost/parser/detail/printing_impl.hpp b/include/boost/parser/detail/printing_impl.hpp index 4eeaba20..931fe89a 100644 --- a/include/boost/parser/detail/printing_impl.hpp +++ b/include/boost/parser/detail/printing_impl.hpp @@ -482,8 +482,7 @@ namespace boost { namespace parser { namespace detail { template< typename Context, typename ResolvedExpected, - bool Integral = std::is_integral{}, - int SizeofExpected = sizeof(ResolvedExpected)> + bool Integral = std::is_integral{}> struct print_expected_char_impl { static void call( @@ -495,13 +494,17 @@ namespace boost { namespace parser { namespace detail { } }; - template - struct print_expected_char_impl + template + struct print_expected_char_impl { static void - call(Context const & context, std::ostream & os, Expected expected) + call(Context const & context, std::ostream & os, char32_t expected) { - std::array cps = {{(char32_t)expected}}; + if (expected == '\'') { + os << "'\\''"; + return; + } + std::array cps = {{expected}}; auto const r = cps | text::as_utf8; os << "'"; for (auto c : r) { @@ -689,6 +692,27 @@ namespace boost { namespace parser { namespace detail { os << "\""; } + template + void print_parser( + Context const & context, + quoted_string_parser const & parser, + std::ostream & os, + int components) + { + os << "quoted_string("; + if constexpr (is_nope_v) { + detail::print_expected_char_impl::call( + context, os, parser.ch_); + } else { + os << '"'; + for (auto c : parser.chs_ | text::as_utf8) { + detail::print_char(os, c); + } + os << '"'; + } + os << ')'; + } + template void print_parser( Context const & context, diff --git a/include/boost/parser/parser.hpp b/include/boost/parser/parser.hpp index 0695df72..8581cd6b 100644 --- a/include/boost/parser/parser.hpp +++ b/include/boost/parser/parser.hpp @@ -1709,7 +1709,8 @@ namespace boost { namespace parser { return text::find(cps.begin(), cps.end(), c_) != cps.end(); } else { - using element_type = decltype(*chars_.begin()); + using element_type = + remove_cv_ref_t; element_type const c = c_; return text::find(chars_.begin(), chars_.end(), c) != chars_.end(); @@ -1730,11 +1731,12 @@ namespace boost { namespace parser { template constexpr auto make_char_range(R && r) noexcept { - if constexpr (std::is_pointer_v>) { + if constexpr (std::is_pointer_v>) { return detail::make_char_range( r, text::null_sentinel); } else { - return detail::make_char_range(r.begin(), r.end()); + return detail::make_char_range( + text::detail::begin(r), text::detail::end(r)); } } @@ -6970,6 +6972,319 @@ namespace boost { namespace parser { return parser_interface{string_parser(str)}; } + template + struct quoted_string_parser + { + constexpr quoted_string_parser() : chs_(), ch_('"') {} + +#if BOOST_PARSER_USE_CONCEPTS + template +#else + template< + typename R, + typename Enable = + std::enable_if_t>> +#endif + constexpr quoted_string_parser(R && r) : chs_((R &&) r), ch_(0) + { + // TODO: This becomes ill-formed when + // BOOST_PARSER_NO_RUNTIME_ASSERTIONS is turned on. + BOOST_PARSER_ASSERT(r.begin() != r.end()); + } + +#if BOOST_PARSER_USE_CONCEPTS + template +#else + template< + typename R, + typename Enable = + std::enable_if_t>> +#endif + constexpr quoted_string_parser(R && r, Escapes escapes) : + chs_((R &&) r), escapes_(escapes), ch_(0) + { + BOOST_PARSER_ASSERT(r.begin() != r.end()); + } + + constexpr quoted_string_parser(char32_t cp) : chs_(), ch_(cp) {} + + constexpr quoted_string_parser(char32_t cp, Escapes escapes) : + chs_(), escapes_(escapes), ch_(cp) + {} + + template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser> + std::string call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success) const + { + std::string retval; + call(first, last, context, skip, flags, success, retval); + return retval; + } + + template< + typename Iter, + typename Sentinel, + typename Context, + typename SkipParser, + typename Attribute> + void call( + Iter & first, + Sentinel last, + Context const & context, + SkipParser const & skip, + detail::flags flags, + bool & success, + Attribute & retval) const + { + [[maybe_unused]] auto _ = detail::scoped_trace( + *this, first, last, context, flags, retval); + + if (first == last) { + success = false; + return; + } + + auto const prev_first = first; + + auto append = [&retval, + gen_attrs = detail::gen_attrs(flags)](auto & ctx) { + detail::move_back(retval, _attr(ctx), gen_attrs); + }; + + auto quote_ch = [&]() { + if constexpr (detail::is_nope_v) { + detail::remove_cv_ref_t curr = *first; + if ((char32_t)curr == ch_) + ++first; + else + success = false; + return ch_; + } else { + detail::remove_cv_ref_t const ch = *first; + bool found = false; + if constexpr (std:: + is_same_v) { + auto r = chs_ | detail::text::as_utf32; + found = detail::text::find(r.begin(), r.end(), ch) != + r.end(); + } else { + found = detail::text::find( + chs_.begin(), chs_.end(), ch) != chs_.end(); + } + if (found) + ++first; + else + success = false; + return ch; + } + }; + + auto const ch = quote_ch(); + if (!success) + return; + + decltype(ch) const backslash_and_delim[] = {'\\', ch}; + auto const back_delim = char_(backslash_and_delim); + + auto make_parser = [&]() { + if constexpr (detail::is_nope_v) { + return *((lit('\\') >> back_delim) | + (char_ - back_delim))[append] > ch; + } else { + return *((lit('\\') >> back_delim)[append] | + (lit('\\') >> parser_interface(escapes_))[append] | + (char_ - back_delim)[append]) > ch; + } + }; + + auto const p = make_parser(); + p.parser_.call( + first, + last, + context, + skip, + detail::disable_skip(flags), + success); + + if (!success) { + retval = Attribute(); + first = prev_first; + } + } + + /** Returns a `parser_interface` containing a `quoted_string_parser` + that uses `x` as its quotation marks. */ +#if BOOST_PARSER_USE_CONCEPTS + template + // clang-format off + requires (!parsable_range_like) +#else + template< + typename T, + typename Enable = + std::enable_if_t>> +#endif + constexpr auto operator()(T x) const noexcept + // clang-format on + { + if constexpr (!detail::is_nope_v) { + BOOST_PARSER_ASSERT( + (chs_.empty() && ch_ == '"' && + "If you're seeing this, you tried to chain calls on " + "quoted_string, like 'quoted_string('\"')('\\'')'. Quit " + "it!'")); + } + return parser_interface(quoted_string_parser(std::move(x))); + } + + /** Returns a `parser_interface` containing a `quoted_string_parser` + that accepts any of the values in `r` as its quotation marks. If + the input being matched during the parse is a a sequence of + `char32_t`, the elements of `r` are transcoded from their presumed + encoding to UTF-32 during the comparison. Otherwise, the + character begin matched is directly compared to the elements of + `r`. */ +#if BOOST_PARSER_USE_CONCEPTS + template +#else + template< + typename R, + typename Enable = + std::enable_if_t>> +#endif + constexpr auto operator()(R && r) const noexcept + { + BOOST_PARSER_ASSERT((( + !std::is_rvalue_reference_v || + !detail::is_range>)&&"It looks like you tried to pass an rvalue range to " + "quoted_string(). Don't do that, or you'll end up " + "with dangling references.")); + if constexpr (!detail::is_nope_v) { + BOOST_PARSER_ASSERT( + (chs_.empty() && ch_ == '"' && + "If you're seeing this, you tried to chain calls on " + "quoted_string, like " + "'quoted_string(char-range)(char-range)'. Quit it!'")); + } + return parser_interface( + quoted_string_parser( + BOOST_PARSER_SUBRANGE( + detail::make_view_begin(r), detail::make_view_end(r)))); + } + + /** Returns a `parser_interface` containing a `quoted_string_parser` + that uses `x` as its quotation marks. `symbols` provides a list + of strings that may appear after a backslash to form an escape + sequence, and what character(s) each escape sequence represents. + Note that `"\\"` and `"\ch"` are always valid escape sequences. */ +#if BOOST_PARSER_USE_CONCEPTS + template + // clang-format off + requires (!parsable_range_like) +#else + template< + typename T, + typename U, + typename Enable = + std::enable_if_t>> +#endif + auto operator()(T x, symbols const & escapes) const noexcept + // clang-format on + { + if constexpr (!detail::is_nope_v) { + BOOST_PARSER_ASSERT( + (chs_.empty() && ch_ == '"' && + "If you're seeing this, you tried to chain calls on " + "quoted_string, like 'quoted_string('\"')('\\'')'. Quit " + "it!'")); + } + auto symbols = symbol_parser(escapes.parser_); + auto parser = + quoted_string_parser( + char32_t(x), symbols); + return parser_interface(parser); + } + + /** Returns a `parser_interface` containing a `quoted_string_parser` + that accepts any of the values in `r` as its quotation marks. If + the input being matched during the parse is a a sequence of + `char32_t`, the elements of `r` are transcoded from their presumed + encoding to UTF-32 during the comparison. Otherwise, the + character begin matched is directly compared to the elements of + `r`. `symbols` provides a list of strings that may appear after a + backslash to form an escape sequence, and what character(s) each + escape sequence represents. Note that `"\\"` and `"\ch"` are + always valid escape sequences. */ +#if BOOST_PARSER_USE_CONCEPTS + template +#else + template< + typename R, + typename T, + typename Enable = + std::enable_if_t>> +#endif + auto operator()(R && r, symbols const & escapes) const noexcept + { + BOOST_PARSER_ASSERT((( + !std::is_rvalue_reference_v || + !detail::is_range>)&&"It looks like you tried to pass an rvalue range to " + "quoted_string(). Don't do that, or you'll end up " + "with dangling references.")); + if constexpr (!detail::is_nope_v) { + BOOST_PARSER_ASSERT( + (chs_.empty() && ch_ == '"' && + "If you're seeing this, you tried to chain calls on " + "quoted_string, like " + "'quoted_string(char-range)(char-range)'. Quit it!'")); + } + auto symbols = symbol_parser(escapes.parser_); + auto quotes = BOOST_PARSER_SUBRANGE( + detail::make_view_begin(r), detail::make_view_end(r)); + auto parser = + quoted_string_parser( + quotes, symbols); + return parser_interface(parser); + } + + Quotes chs_; + Escapes escapes_; + char32_t ch_; + }; + + /** Parses a string delimited by quotation marks. This parser can be used + to create parsers that accept one or more specific quotation mark + characters. By default, the quotation marks are `'"'`; an alternate + quotation mark can be specified by calling this parser with a single + character, or a range of characters. If a range is specified, the + opening quote must be one of the characters specified, and the closing + quote must match the opening quote. Quotation marks may appear within + the string if escaped with a backslash, and a pair of backslashes is + treated as a single escaped backslash; all other backslashes cause the + parse to fail, unless a symbol table is in use. A symbol table can be + provided as a second parameter after the single character or range + described above. The symbol table is used to recognize escape + sequences. Each escape sequence is a backslash followed by a value in + the symbol table. When using a symbol table, any backslash that is + not followed by another backslash, the opening quote character, or a + symbol from the symbol table will cause the parse to fail. Skipping + is disabled during parsing of the entire quoted string, including the + quotation marks. There is an expectation point before the closing + quotation mark. Produces a `std::string` attribute. */ + inline constexpr parser_interface> quoted_string; + /** Returns a parser that matches `str` that produces no attribute. */ #if BOOST_PARSER_USE_CONCEPTS template diff --git a/include/boost/parser/parser_fwd.hpp b/include/boost/parser/parser_fwd.hpp index 3bb33356..b33b0c0a 100644 --- a/include/boost/parser/parser_fwd.hpp +++ b/include/boost/parser/parser_fwd.hpp @@ -342,12 +342,17 @@ namespace boost { namespace parser { character being matched. */ struct digit_parser; - /** Maches a particular string, delimited by an iterator sentinel pair; + /** Matches a particular string, delimited by an iterator sentinel pair; produces no attribute. */ template struct string_parser; - /** Maches an end-of-line (`NewlinesOnly == true`), whitespace + /** Matches a string delimited by quotation marks; produces a + `std::string` attribute. */ + template + struct quoted_string_parser; + + /** Matches an end-of-line (`NewlinesOnly == true`), whitespace (`NewlinesOnly == false`), or (`NoNewlines == true`) blank (whitespace but not newline) code point, based on the Unicode definitions of each (also matches the two code points `"\r\n"`). Produces no @@ -355,7 +360,7 @@ namespace boost { namespace parser { template struct ws_parser; - /** Maches the strings "true" and "false", producing an attribute of + /** Matches the strings "true" and "false", producing an attribute of `true` or `false`, respectively, and fails on any other input. */ struct bool_parser; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6b2ebe6c..23ab84ff 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -62,6 +62,7 @@ add_test_executable(parser_rule_with_params) add_test_executable(parser_action) add_test_executable(parser_action_with_params) add_test_executable(parser_symbol_table) +add_test_executable(parser_quoted_string) add_test_executable(tracing) add_test_executable(parse_empty) add_test_executable(tuple_aggregate) diff --git a/test/parser_quoted_string.cpp b/test/parser_quoted_string.cpp new file mode 100644 index 00000000..c3e3538f --- /dev/null +++ b/test/parser_quoted_string.cpp @@ -0,0 +1,279 @@ +/** + * Copyright (C) 2024 T. Zachary Laine + * + * Distributed under the Boost Software License, Version 1.0. (See + * accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#include +#include + +#include + + +namespace bp = boost::parser; + +bp::symbols const cu_escapes = {{"t", '\t'}, {"r", '\r'}, {"n", '\n'}}; +bp::symbols const cp_escapes = { + {"t", '\t'}, {"r", '\r'}, {"n", '\n'}}; + +TEST(quoted_string, basic) +{ + constexpr auto parser = bp::quoted_string; + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + auto result = bp::parse(R"("foo")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo"); + } + + { + auto result = bp::parse(R"("foo\\")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\\"); + } + + { + auto result = bp::parse(R"("\"foo\"")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "\"foo\""); + } +} + +TEST(quoted_string, different_char) +{ + constexpr auto parser = bp::quoted_string('\''); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + auto result = bp::parse(R"('foo')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo"); + } + + { + auto result = bp::parse(R"('foo\\')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\\"); + } + + { + auto result = bp::parse(R"('\'foo\'')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "'foo'"); + } +} + +TEST(quoted_string, different_char_with_escapes) +{ + { + auto parser = bp::quoted_string('\'', cu_escapes); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + auto result = bp::parse(R"('foo\t')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\t"); + } + + { + auto result = bp::parse(R"('foo\x')", parser, bp::ws); + EXPECT_FALSE(result); + } + } + { + auto parser = bp::quoted_string('\'', cp_escapes); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + auto result = bp::parse(R"('\tfoo')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "\tfoo"); + } + + { + auto result = bp::parse(R"('f\xoo')", parser, bp::ws); + EXPECT_FALSE(result); + } + } +} + +TEST(quoted_string, char_set) +{ + constexpr auto parser = bp::quoted_string("'\""); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + EXPECT_FALSE(bp::parse(R"('foo")", parser, bp::ws)); + EXPECT_FALSE(bp::parse(R"("foo')", parser, bp::ws)); + } + + { + auto result = bp::parse(R"('foo')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo"); + } + { + auto result = bp::parse(R"("foo")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo"); + } + + { + auto result = bp::parse(R"('foo\\')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\\"); + } + { + auto result = bp::parse(R"("foo\\")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\\"); + } + + { + auto result = bp::parse(R"('\'foo\'')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "'foo'"); + } + { + auto result = bp::parse(R"("\"foo\"")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "\"foo\""); + } + + { + // Can't escape arbitrary characters, only backslash and the quote + // character. + EXPECT_FALSE(bp::parse(R"("\'foo")", parser, bp::ws)); + } +} + +TEST(quoted_string, char_set_with_escapes) +{ + { + auto parser = bp::quoted_string("'\"", cu_escapes); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + EXPECT_FALSE(bp::parse(R"('foo")", parser, bp::ws)); + EXPECT_FALSE(bp::parse(R"("foo')", parser, bp::ws)); + } + + { + auto result = bp::parse(R"('foo\t')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\t"); + } + { + auto result = bp::parse(R"("\tfoo")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "\tfoo"); + } + + { + auto result = bp::parse(R"('foo\x')", parser, bp::ws); + EXPECT_FALSE(result); + } + } + { + auto parser = bp::quoted_string("'\"", cp_escapes); + + { + auto result = bp::parse("", parser, bp::ws); + EXPECT_FALSE(result); + } + + { + EXPECT_FALSE(bp::parse(R"('foo")", parser, bp::ws)); + EXPECT_FALSE(bp::parse(R"("foo')", parser, bp::ws)); + } + + { + auto result = bp::parse(R"('foo\t')", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "foo\t"); + } + { + auto result = bp::parse(R"("\tfoo")", parser, bp::ws); + EXPECT_TRUE(result); + EXPECT_EQ(*result, "\tfoo"); + } + + { + auto result = bp::parse(R"('foo\x')", parser, bp::ws); + EXPECT_FALSE(result); + } + } +} + +TEST(quoted_string, doc_examples) +{ + //[ quoted_string_example_1_2 + namespace bp = boost::parser; + + auto result1 = bp::parse("\"some text\"", bp::quoted_string, bp::ws); + assert(result1); + std::cout << *result1 << "\n"; // Prints: some text + + auto result2 = + bp::parse("\"some \\\"text\\\"\"", bp::quoted_string, bp::ws); + assert(result2); + std::cout << *result2 << "\n"; // Prints: some "text" + //] + + //[ quoted_string_example_3 + auto result3 = bp::parse("!some text!", bp::quoted_string('!'), bp::ws); + assert(result3); + std::cout << *result3 << "\n"; // Prints: some text + //] + + //[ quoted_string_example_4 + auto result4 = bp::parse("'some text'", bp::quoted_string("'\""), bp::ws); + assert(result4); + std::cout << *result4 << "\n"; // Prints: some text + //] + + //[ quoted_string_example_5 + // the c++ simple escapes + bp::symbols const escapes = { + {"'", '\''}, + {"?", '\?'}, + {"a", '\a'}, + {"b", '\b'}, + {"f", '\f'}, + {"n", '\n'}, + {"r", '\r'}, + {"t", '\t'}, + {"v", '\v'}}; + auto result5 = + bp::parse("\"some text\r\"", bp::quoted_string('"', escapes), bp::ws); + assert(result5); + std::cout << *result5 << "\n"; // Prints (with a CRLF newline): some text + //] +} diff --git a/test/tracing.cpp b/test/tracing.cpp index 24f7f211..9e585ce6 100644 --- a/test/tracing.cpp +++ b/test/tracing.cpp @@ -312,6 +312,15 @@ int main() PARSE(string("h")); + std::cout << "\n\n" + << "----------------------------------------\n" + << "| quoted_string() |\n" + << "----------------------------------------\n"; + + PARSE(quoted_string); + PARSE(quoted_string('\'')); + PARSE(quoted_string("'\"")); + std::cout << "\n\n" << "----------------------------------------\n" << "| eol |\n"