From ab6bb9a43b7d020d2495e74ad78d372a3e319d86 Mon Sep 17 00:00:00 2001 From: Raphael Grimm Date: Sat, 20 Nov 2021 14:22:56 +0100 Subject: [PATCH] Extend sax parser to optionally accept position information for parsed tokens --- .../nlohmann/detail/input/binary_reader.hpp | 230 +- include/nlohmann/detail/input/lexer.hpp | 14 +- include/nlohmann/detail/input/parser.hpp | 45 +- include/nlohmann/detail/meta/is_sax.hpp | 145 ++ single_include/nlohmann/json.hpp | 434 +++- tests/src/unit-sax-parser-extended.cpp | 1893 +++++++++++++++++ .../unit-sax-parser-store-source-location.cpp | 333 +++ 7 files changed, 2992 insertions(+), 102 deletions(-) create mode 100644 tests/src/unit-sax-parser-extended.cpp create mode 100644 tests/src/unit-sax-parser-store-source-location.cpp diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 832c36ddf8..ea9032c3cf 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -168,8 +168,9 @@ class binary_reader bool parse_bson_internal() { std::int32_t document_size{}; + detail::sax_call_next_token_start_pos(sax, chars_read); get_number(input_format_t::bson, document_size); - + detail::sax_call_next_token_end_pos(sax, chars_read); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; @@ -180,6 +181,7 @@ class binary_reader return false; } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_object(); } @@ -277,6 +279,7 @@ class binary_reader case 0x01: // double { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(number)); return get_number(input_format_t::bson, number) && sax->number_float(static_cast(number), ""); } @@ -284,7 +287,10 @@ class binary_reader { std::int32_t len{}; string_t value; - return get_number(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_number(input_format_t::bson, len) && get_bson_string(len, value); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(value); } case 0x03: // object @@ -301,28 +307,35 @@ class binary_reader { std::int32_t len{}; binary_t value; - return get_number(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_number(input_format_t::bson, len) && get_bson_binary(len, value); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(value); } case 0x08: // boolean { + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + 1); return sax->boolean(get() != 0); } case 0x0A: // null { + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->null(); } case 0x10: // int32 { std::int32_t value{}; - return get_number(input_format_t::bson, value) && sax->number_integer(value); + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value)); + return get_number(input_format_t::bson, value) && sax->number_integer(value); } case 0x12: // int64 { std::int64_t value{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value)); return get_number(input_format_t::bson, value) && sax->number_integer(value); } @@ -361,14 +374,22 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; + if (!is_array) + { + detail::sax_call_next_token_start_pos(sax, chars_read); + } if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key))) { return false; } - if (!is_array && !sax->key(key)) + if (!is_array) { - return false; + detail::sax_call_next_token_end_pos(sax, chars_read); + if (!sax->key(key)) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position))) @@ -390,6 +411,7 @@ class binary_reader bool parse_bson_array() { std::int32_t document_size{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(std::int32_t)); get_number(input_format_t::bson, document_size); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) @@ -402,6 +424,7 @@ class binary_reader return false; } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_array(); } @@ -451,29 +474,34 @@ class binary_reader case 0x15: case 0x16: case 0x17: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x19: // Unsigned integer (two-byte uint16_t follows) { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x1A: // Unsigned integer (four-byte uint32_t follows) { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x1B: // Unsigned integer (eight-byte uint64_t follows) { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } @@ -502,29 +530,34 @@ class binary_reader case 0x35: case 0x36: case 0x37: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - static_cast(number)); } @@ -561,7 +594,10 @@ class binary_reader case 0x5F: // Binary data (indefinite length) { binary_t b; - return get_cbor_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } // UTF-8 string (0x00..0x17 bytes follow) @@ -596,7 +632,10 @@ class binary_reader case 0x7F: // UTF-8 string (indefinite length) { string_t s; - return get_cbor_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } // array (0x00..0x17 data items follow) @@ -624,35 +663,51 @@ class binary_reader case 0x95: case 0x96: case 0x97: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_array( conditional_static_cast(static_cast(current) & 0x1Fu), tag_handler); case 0x98: // array (one-byte uint8_t for n follows) { std::uint8_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(static_cast(len), tag_handler); } case 0x99: // array (two-byte uint16_t for n follow) { std::uint16_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(static_cast(len), tag_handler); } case 0x9A: // array (four-byte uint32_t for n follow) { std::uint32_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(conditional_static_cast(len), tag_handler); } case 0x9B: // array (eight-byte uint64_t for n follow) { std::uint64_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(conditional_static_cast(len), tag_handler); } case 0x9F: // array (indefinite length) + { + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_array(static_cast(-1), tag_handler); + } // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -679,33 +734,47 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_object(conditional_static_cast(static_cast(current) & 0x1Fu), tag_handler); case 0xB8: // map (one-byte uint8_t for n follows) { std::uint8_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(static_cast(len), tag_handler); } case 0xB9: // map (two-byte uint16_t for n follow) { std::uint16_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(static_cast(len), tag_handler); } case 0xBA: // map (four-byte uint32_t for n follow) { std::uint32_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(conditional_static_cast(len), tag_handler); } case 0xBB: // map (eight-byte uint64_t for n follow) { std::uint64_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(conditional_static_cast(len), tag_handler); } case 0xBF: // map (indefinite length) + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_object(static_cast(-1), tag_handler); case 0xC6: // tagged item @@ -810,7 +879,10 @@ class binary_reader return parse_cbor_internal(true, tag_handler); } get(); - return get_cbor_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_cbor_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } default: // LCOV_EXCL_LINE @@ -820,16 +892,20 @@ class binary_reader } case 0xF4: // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 0xF5: // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 0xF6: // null + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 0xF9: // Half-Precision Float (two-byte IEEE 754) { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); const auto byte1_raw = get(); if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) { @@ -871,6 +947,7 @@ class binary_reader return std::ldexp(mant + 1024, exp - 25); } }(); + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->number_float((half & 0x8000u) != 0 ? static_cast(-val) : static_cast(val), ""); @@ -879,12 +956,14 @@ class binary_reader case 0xFA: // Single-Precision Float (four-byte IEEE 754) { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast(number), ""); } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast(number), ""); } @@ -1128,6 +1207,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_array(); } @@ -1153,7 +1233,10 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -1169,7 +1252,10 @@ class binary_reader { while (get() != 0xFF) { - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -1183,6 +1269,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_object(); } @@ -1330,6 +1417,7 @@ class binary_reader case 0x7D: case 0x7E: case 0x7F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_unsigned(static_cast(current)); // fixmap @@ -1349,6 +1437,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_msgpack_object(conditional_static_cast(static_cast(current) & 0x0Fu)); // fixarray @@ -1368,6 +1457,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_msgpack_array(conditional_static_cast(static_cast(current) & 0x0Fu)); // fixstr @@ -1408,16 +1498,22 @@ class binary_reader case 0xDB: // str 32 { string_t s; - return get_msgpack_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } case 0xC0: // nil + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 0xC2: // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 0xC3: // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 0xC4: // bin 8 @@ -1433,90 +1529,107 @@ class binary_reader case 0xD8: // fixext 16 { binary_t b; - return get_msgpack_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } case 0xCA: // float 32 { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast(number), ""); } case 0xCB: // float 64 { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast(number), ""); } case 0xCC: // uint 8 { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCD: // uint 16 { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCE: // uint 32 { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCF: // uint 64 { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xD0: // int 8 { std::int8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD1: // int 16 { std::int16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD2: // int 32 { std::int32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD3: // int 64 { std::int64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xDC: // array 16 { std::uint16_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast(len)); } case 0xDD: // array 32 { std::uint32_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast(len)); } case 0xDE: // map 16 { std::uint16_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast(len)); } case 0xDF: // map 32 { std::uint32_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast(len)); } @@ -1553,6 +1666,7 @@ class binary_reader case 0xFD: case 0xFE: case 0xFF: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_integer(static_cast(current)); default: // anything else @@ -1783,6 +1897,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_array(); } @@ -1801,7 +1916,10 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -1813,6 +1931,7 @@ class binary_reader key.clear(); } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_object(); } @@ -2175,7 +2294,6 @@ class binary_reader return true; } } - string_t key = "_ArraySize_"; if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size()))) { @@ -2236,7 +2354,6 @@ class binary_reader bool is_ndarray = false; get_ignore_noop(); - if (current == '$') { result.second = get(); // must not ignore 'N', because 'N' maybe the type @@ -2265,7 +2382,9 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } + // detail::sax_call_next_token_start_pos(sax, chars_read - 1); const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + //detail::sax_call_next_token_end_pos(sax, chars_read); if (input_format == input_format_t::bjdata && is_ndarray) { if (inside_ndarray) @@ -2280,7 +2399,9 @@ class binary_reader if (current == '#') { + // detail::sax_call_next_token_start_pos(sax, chars_read - 1); const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + // detail::sax_call_next_token_end_pos(sax, chars_read); if (input_format == input_format_t::bjdata && is_ndarray) { return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read, @@ -2289,6 +2410,7 @@ class binary_reader return is_error; } + // detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read - 1); return true; } @@ -2304,40 +2426,47 @@ class binary_reader return unexpect_eof(input_format, "value"); case 'T': // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 'F': // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 'Z': // null + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 'U': { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } case 'i': { std::int8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } case 'I': { std::int16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } case 'l': { std::int32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } - case 'L': { std::int64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } @@ -2348,6 +2477,7 @@ class binary_reader break; } std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } @@ -2358,6 +2488,7 @@ class binary_reader break; } std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } @@ -2368,11 +2499,13 @@ class binary_reader break; } std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } case 'h': { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); if (input_format != input_format_t::bjdata) { break; @@ -2418,25 +2551,30 @@ class binary_reader return std::ldexp(mant + 1024, exp - 25); } }(); + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->number_float((half & 0x8000u) != 0 ? static_cast(-val) - : static_cast(val), ""); + : static_cast(val), + ""); } case 'd': { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_float(static_cast(number), ""); } case 'D': { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_float(static_cast(number), ""); } case 'H': { + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_high_precision_number(); } @@ -2454,19 +2592,25 @@ class binary_reader exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr)); } string_t s(1, static_cast(current)); + detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read); return sax->string(s); } case 'S': // string { string_t s; - return get_ubjson_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } case '[': // array + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_array(); case '{': // object + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_object(); default: // anything else @@ -2481,6 +2625,7 @@ class binary_reader */ bool get_ubjson_array() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); std::pair size_and_type; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -2505,6 +2650,7 @@ class binary_reader exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); } + detail::sax_call_next_token_end_pos(sax, chars_read); string_t type = it->second; // sax->string() takes a reference if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) { @@ -2516,6 +2662,7 @@ class binary_reader size_and_type.second = 'U'; } + detail::sax_call_next_token_start_end_pos(sax, chars_read); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -2524,17 +2671,20 @@ class binary_reader for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) { return false; } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return (sax->end_array() && sax->end_object()); } if (size_and_type.first != npos) { + detail::sax_call_next_token_end_pos(sax, chars_read); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) { return false; @@ -2546,6 +2696,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) { return false; @@ -2557,6 +2708,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) { return false; @@ -2566,6 +2718,7 @@ class binary_reader } else { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) { return false; @@ -2581,6 +2734,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_array(); } @@ -2589,6 +2743,7 @@ class binary_reader */ bool get_ubjson_object() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); std::pair size_and_type; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -2606,6 +2761,7 @@ class binary_reader string_t key; if (size_and_type.first != npos) { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) { return false; @@ -2615,7 +2771,10 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -2630,7 +2789,10 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -2644,6 +2806,7 @@ class binary_reader } else { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; @@ -2651,7 +2814,10 @@ class binary_reader while (current != '}') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key, false); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -2664,6 +2830,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_object(); } @@ -2672,6 +2839,7 @@ class binary_reader bool get_ubjson_high_precision_number() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); // get size of following number string std::size_t size{}; bool no_ndarray = true; @@ -2692,6 +2860,7 @@ class binary_reader } number_vector.push_back(static_cast(current)); } + detail::sax_call_next_token_end_pos(sax, chars_read); // parse number string using ia_type = decltype(detail::input_adapter(number_vector)); @@ -2889,6 +3058,7 @@ class binary_reader { if (JSON_HEDLEY_UNLIKELY(current == std::char_traits::eof())) { + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); } diff --git a/include/nlohmann/detail/input/lexer.hpp b/include/nlohmann/detail/input/lexer.hpp index 72e9951081..174092b0d9 100644 --- a/include/nlohmann/detail/input/lexer.hpp +++ b/include/nlohmann/detail/input/lexer.hpp @@ -1506,13 +1506,13 @@ class lexer : public lexer_base while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); } - token_type scan() + bool scan_start() { // initially, skip the BOM if (position.chars_read_total == 0 && !skip_bom()) { error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; - return token_type::parse_error; + return false; } // read next character and ignore whitespace @@ -1523,13 +1523,17 @@ class lexer : public lexer_base { if (!scan_comment()) { - return token_type::parse_error; + return false; } // skip following whitespace skip_whitespace(); } + return true; + } + token_type scan_end() + { switch (current) { // structural characters @@ -1593,6 +1597,10 @@ class lexer : public lexer_base return token_type::parse_error; } } + token_type scan() + { + return !scan_start() ? token_type::parse_error : scan_end(); + } private: /// input adapter diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 8acbd4fcad..af20e3167d 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -76,8 +76,6 @@ class parser , m_lexer(std::move(adapter), skip_comments) , allow_exceptions(allow_exceptions_) { - // read first token - get_token(); } /*! @@ -98,7 +96,7 @@ class parser sax_parse_internal(&sdp); // in strict mode, input must be completely read - if (strict && (get_token() != token_type::end_of_input)) + if (strict && (get_token(&sdp) != token_type::end_of_input)) { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -126,7 +124,7 @@ class parser sax_parse_internal(&sdp); // in strict mode, input must be completely read - if (strict && (get_token() != token_type::end_of_input)) + if (strict && (get_token(&sdp) != token_type::end_of_input)) { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -164,7 +162,7 @@ class parser const bool result = sax_parse_internal(sax); // strict mode: next byte must be EOF - if (result && strict && (get_token() != token_type::end_of_input)) + if (result && strict && (get_token(sax) != token_type::end_of_input)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -185,6 +183,8 @@ class parser // value to avoid a goto (see comment where set to true) bool skip_to_state_evaluation = false; + // read first token + get_token(sax); while (true) { if (!skip_to_state_evaluation) @@ -200,7 +200,7 @@ class parser } // closing } -> we are done - if (get_token() == token_type::end_object) + if (get_token(sax) == token_type::end_object) { if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) { @@ -222,7 +222,7 @@ class parser } // parse separator (:) - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -233,7 +233,7 @@ class parser states.push_back(false); // parse values - get_token(); + get_token(sax); continue; } @@ -245,7 +245,7 @@ class parser } // closing ] -> we are done - if (get_token() == token_type::end_array) + if (get_token(sax) == token_type::end_array) { if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) { @@ -372,10 +372,10 @@ class parser if (states.back()) // array { // comma -> next value - if (get_token() == token_type::value_separator) + if (get_token(sax) == token_type::value_separator) { // parse a new value - get_token(); + get_token(sax); continue; } @@ -405,10 +405,10 @@ class parser // states.back() is false -> object // comma -> next value - if (get_token() == token_type::value_separator) + if (get_token(sax) == token_type::value_separator) { // parse key - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -421,7 +421,7 @@ class parser } // parse separator (:) - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -429,7 +429,7 @@ class parser } // parse values - get_token(); + get_token(sax); continue; } @@ -457,10 +457,19 @@ class parser } } - /// get next token from lexer - token_type get_token() + /// get next token from lexer and pass position info to sax (if it is accepted) + template + token_type get_token(SAX* sax) { - return last_token = m_lexer.scan(); + if (!m_lexer.scan_start()) + { + last_token = token_type::parse_error; + return token_type::parse_error; + } + detail::sax_call_next_token_start_pos(sax, m_lexer); + last_token = m_lexer.scan_end(); + detail::sax_call_next_token_end_pos(sax, m_lexer); + return last_token; } std::string exception_message(const token_type expected, const std::string& context) diff --git a/include/nlohmann/detail/meta/is_sax.hpp b/include/nlohmann/detail/meta/is_sax.hpp index 2150089632..fd05864340 100644 --- a/include/nlohmann/detail/meta/is_sax.hpp +++ b/include/nlohmann/detail/meta/is_sax.hpp @@ -19,6 +19,151 @@ NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { +// helper struct to call sax->next_token_start +//(we want this functionality as a type to ease passing it as template argument) +struct sax_call_next_token_start_pos_direct +{ + template + static auto call(SAX* sax, Ts&& ...ts) + -> decltype(sax->next_token_start(std::forward(ts)...)) + { + sax->next_token_start(std::forward(ts)...); + } +}; +// helper struct to call sax->next_token_end +// (we want this functionality as a type to ease passing it as template argument) +struct sax_call_next_token_end_pos_direct +{ + template + static auto call(SAX* sax, Ts&& ...ts) + -> decltype(sax->next_token_end(std::forward(ts)...)) + { + sax->next_token_end(std::forward(ts)...); + } +}; + +// dispatch the calls to next_token_start next_token_end +// and drop the calls if the sax parser does not support these methods. +// +// DirectCaller can be set to one of sax_call_next_token_{start,end}_pos_direct to +// determine which method is called +template +struct sax_call_function +{ + // is the parameter a lexer or a position + static constexpr bool no_lexer = std::is_same::value; + + template + using call_t = decltype(DirectCaller::call(std::declval(), std::declval()...)); + + //the sax parser supports calls with a position + static constexpr bool detected_call_with_pos = + is_detected_exact::value; + + //the sax parser supports calls with a lexer + static constexpr bool detected_call_with_lex = + !no_lexer && + is_detected_exact::value; + + //there either has to be a version accepting a lexer or a position + static constexpr bool valid = detected_call_with_pos || detected_call_with_lex; + + //called with pos and pos is method supported -> pass data on + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + sax_call_function::detected_call_with_pos + >::type + call(SaxT* sax, std::size_t pos) + { + DirectCaller::call(sax, pos); + } + + //the sax parser has no version of the method -> drop call + template + static typename std::enable_if < + std::is_same::value && + !sax_call_function::valid + >::type + call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {} + + //called with lex and lex method is supported -> pass data on + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + sax_call_function::detected_call_with_lex + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex); + } + + // called with lex and only pos method is supported -> call with position from lexer + // the start pos in the lexer is last read char -> chars_read_total-1 + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + !sax_call_function::detected_call_with_lex && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex.get_position().chars_read_total - 1); + } + + // called with lex and only pos method is supported -> call with position from lexer + // the one past end pos in the lexer is the current index -> chars_read_total + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + !sax_call_function::detected_call_with_lex && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex.get_position().chars_read_total); + } +}; + +//set the element start pos of a sax parser by calling any version of sax->next_token_start (if available) +template +void sax_call_next_token_start_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + using call_t = sax_call_function; + call_t::call(sax, lexOrPos); +} +//set the element end pos of a sax parser by calling any version of sax->next_token_end (if available) +template +void sax_call_next_token_end_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + using call_t = sax_call_function; + call_t::call(sax, lexOrPos); +} +//set the element start end pos of a sax parser by calling any version of +// sax->next_token_start and sax->next_token_end (if available) +template +void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos1& lexOrPos1, const LexOrPos2& lexOrPos2) +{ + sax_call_next_token_start_pos(sax, lexOrPos1); + sax_call_next_token_end_pos(sax, lexOrPos2); +} +//set the element start end pos of a sax parser by calling any version of +// sax->next_token_start and sax->next_token_end (if available) +template +void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + sax_call_next_token_start_pos(sax, lexOrPos); + sax_call_next_token_end_pos(sax, lexOrPos); +} + + template using null_function_t = decltype(std::declval().null()); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index e11f529688..ae7aacbccd 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8811,13 +8811,13 @@ class lexer : public lexer_base while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); } - token_type scan() + bool scan_start() { // initially, skip the BOM if (position.chars_read_total == 0 && !skip_bom()) { error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; - return token_type::parse_error; + return false; } // read next character and ignore whitespace @@ -8828,13 +8828,17 @@ class lexer : public lexer_base { if (!scan_comment()) { - return token_type::parse_error; + return false; } // skip following whitespace skip_whitespace(); } + return true; + } + token_type scan_end() + { switch (current) { // structural characters @@ -8898,6 +8902,10 @@ class lexer : public lexer_base return token_type::parse_error; } } + token_type scan() + { + return !scan_start() ? token_type::parse_error : scan_end(); + } private: /// input adapter @@ -8963,6 +8971,151 @@ NLOHMANN_JSON_NAMESPACE_END NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { +// helper struct to call sax->next_token_start +//(we want this functionality as a type to ease passing it as template argument) +struct sax_call_next_token_start_pos_direct +{ + template + static auto call(SAX* sax, Ts&& ...ts) + -> decltype(sax->next_token_start(std::forward(ts)...)) + { + sax->next_token_start(std::forward(ts)...); + } +}; +// helper struct to call sax->next_token_end +// (we want this functionality as a type to ease passing it as template argument) +struct sax_call_next_token_end_pos_direct +{ + template + static auto call(SAX* sax, Ts&& ...ts) + -> decltype(sax->next_token_end(std::forward(ts)...)) + { + sax->next_token_end(std::forward(ts)...); + } +}; + +// dispatch the calls to next_token_start next_token_end +// and drop the calls if the sax parser does not support these methods. +// +// DirectCaller can be set to one of sax_call_next_token_{start,end}_pos_direct to +// determine which method is called +template +struct sax_call_function +{ + // is the parameter a lexer or a position + static constexpr bool no_lexer = std::is_same::value; + + template + using call_t = decltype(DirectCaller::call(std::declval(), std::declval()...)); + + //the sax parser supports calls with a position + static constexpr bool detected_call_with_pos = + is_detected_exact::value; + + //the sax parser supports calls with a lexer + static constexpr bool detected_call_with_lex = + !no_lexer && + is_detected_exact::value; + + //there either has to be a version accepting a lexer or a position + static constexpr bool valid = detected_call_with_pos || detected_call_with_lex; + + //called with pos and pos is method supported -> pass data on + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + sax_call_function::detected_call_with_pos + >::type + call(SaxT* sax, std::size_t pos) + { + DirectCaller::call(sax, pos); + } + + //the sax parser has no version of the method -> drop call + template + static typename std::enable_if < + std::is_same::value && + !sax_call_function::valid + >::type + call(SaxT* /*unused*/, const LexOrPos& /*unused*/) {} + + //called with lex and lex method is supported -> pass data on + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + sax_call_function::detected_call_with_lex + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex); + } + + // called with lex and only pos method is supported -> call with position from lexer + // the start pos in the lexer is last read char -> chars_read_total-1 + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + !sax_call_function::detected_call_with_lex && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex.get_position().chars_read_total - 1); + } + + // called with lex and only pos method is supported -> call with position from lexer + // the one past end pos in the lexer is the current index -> chars_read_total + template + static typename std::enable_if < + sax_call_function::valid && + std::is_same::value && + !sax_call_function::no_lexer && + !sax_call_function::detected_call_with_lex && + std::is_same::value + >::type + call(SaxT* sax, const LexOrPos& lex) + { + DirectCaller::call(sax, lex.get_position().chars_read_total); + } +}; + +//set the element start pos of a sax parser by calling any version of sax->next_token_start (if available) +template +void sax_call_next_token_start_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + using call_t = sax_call_function; + call_t::call(sax, lexOrPos); +} +//set the element end pos of a sax parser by calling any version of sax->next_token_end (if available) +template +void sax_call_next_token_end_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + using call_t = sax_call_function; + call_t::call(sax, lexOrPos); +} +//set the element start end pos of a sax parser by calling any version of +// sax->next_token_start and sax->next_token_end (if available) +template +void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos1& lexOrPos1, const LexOrPos2& lexOrPos2) +{ + sax_call_next_token_start_pos(sax, lexOrPos1); + sax_call_next_token_end_pos(sax, lexOrPos2); +} +//set the element start end pos of a sax parser by calling any version of +// sax->next_token_start and sax->next_token_end (if available) +template +void sax_call_next_token_start_end_pos(SAX* sax, const LexOrPos& lexOrPos) +{ + sax_call_next_token_start_pos(sax, lexOrPos); + sax_call_next_token_end_pos(sax, lexOrPos); +} + + template using null_function_t = decltype(std::declval().null()); @@ -9246,8 +9399,9 @@ class binary_reader bool parse_bson_internal() { std::int32_t document_size{}; + detail::sax_call_next_token_start_pos(sax, chars_read); get_number(input_format_t::bson, document_size); - + detail::sax_call_next_token_end_pos(sax, chars_read); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; @@ -9258,6 +9412,7 @@ class binary_reader return false; } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_object(); } @@ -9355,6 +9510,7 @@ class binary_reader case 0x01: // double { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(number)); return get_number(input_format_t::bson, number) && sax->number_float(static_cast(number), ""); } @@ -9362,7 +9518,10 @@ class binary_reader { std::int32_t len{}; string_t value; - return get_number(input_format_t::bson, len) && get_bson_string(len, value) && sax->string(value); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_number(input_format_t::bson, len) && get_bson_string(len, value); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(value); } case 0x03: // object @@ -9379,28 +9538,35 @@ class binary_reader { std::int32_t len{}; binary_t value; - return get_number(input_format_t::bson, len) && get_bson_binary(len, value) && sax->binary(value); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_number(input_format_t::bson, len) && get_bson_binary(len, value); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(value); } case 0x08: // boolean { + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + 1); return sax->boolean(get() != 0); } case 0x0A: // null { + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->null(); } case 0x10: // int32 { std::int32_t value{}; - return get_number(input_format_t::bson, value) && sax->number_integer(value); + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value)); + return get_number(input_format_t::bson, value) && sax->number_integer(value); } case 0x12: // int64 { std::int64_t value{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(value)); return get_number(input_format_t::bson, value) && sax->number_integer(value); } @@ -9439,14 +9605,22 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; + if (!is_array) + { + detail::sax_call_next_token_start_pos(sax, chars_read); + } if (JSON_HEDLEY_UNLIKELY(!get_bson_cstr(key))) { return false; } - if (!is_array && !sax->key(key)) + if (!is_array) { - return false; + detail::sax_call_next_token_end_pos(sax, chars_read); + if (!sax->key(key)) + { + return false; + } } if (JSON_HEDLEY_UNLIKELY(!parse_bson_element_internal(element_type, element_type_parse_position))) @@ -9468,6 +9642,7 @@ class binary_reader bool parse_bson_array() { std::int32_t document_size{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read, chars_read + sizeof(std::int32_t)); get_number(input_format_t::bson, document_size); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) @@ -9480,6 +9655,7 @@ class binary_reader return false; } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_array(); } @@ -9529,29 +9705,34 @@ class binary_reader case 0x15: case 0x16: case 0x17: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_unsigned(static_cast(current)); case 0x18: // Unsigned integer (one-byte uint8_t follows) { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x19: // Unsigned integer (two-byte uint16_t follows) { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x1A: // Unsigned integer (four-byte uint32_t follows) { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } case 0x1B: // Unsigned integer (eight-byte uint64_t follows) { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_unsigned(number); } @@ -9580,29 +9761,34 @@ class binary_reader case 0x35: case 0x36: case 0x37: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_integer(static_cast(0x20 - 1 - current)); case 0x38: // Negative integer (one-byte uint8_t follows) { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - number); } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_integer(static_cast(-1) - static_cast(number)); } @@ -9639,7 +9825,10 @@ class binary_reader case 0x5F: // Binary data (indefinite length) { binary_t b; - return get_cbor_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } // UTF-8 string (0x00..0x17 bytes follow) @@ -9674,7 +9863,10 @@ class binary_reader case 0x7F: // UTF-8 string (indefinite length) { string_t s; - return get_cbor_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } // array (0x00..0x17 data items follow) @@ -9702,35 +9894,51 @@ class binary_reader case 0x95: case 0x96: case 0x97: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_array( conditional_static_cast(static_cast(current) & 0x1Fu), tag_handler); case 0x98: // array (one-byte uint8_t for n follows) { std::uint8_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(static_cast(len), tag_handler); } case 0x99: // array (two-byte uint16_t for n follow) { std::uint16_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(static_cast(len), tag_handler); } case 0x9A: // array (four-byte uint32_t for n follow) { std::uint32_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(conditional_static_cast(len), tag_handler); } case 0x9B: // array (eight-byte uint64_t for n follow) { std::uint64_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_array(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_array(conditional_static_cast(len), tag_handler); } case 0x9F: // array (indefinite length) + { + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_array(static_cast(-1), tag_handler); + } // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -9757,33 +9965,47 @@ class binary_reader case 0xB5: case 0xB6: case 0xB7: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_object(conditional_static_cast(static_cast(current) & 0x1Fu), tag_handler); case 0xB8: // map (one-byte uint8_t for n follows) { std::uint8_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(static_cast(len), tag_handler); } case 0xB9: // map (two-byte uint16_t for n follow) { std::uint16_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(static_cast(len), tag_handler); } case 0xBA: // map (four-byte uint32_t for n follow) { std::uint32_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(conditional_static_cast(len), tag_handler); } case 0xBB: // map (eight-byte uint64_t for n follow) { std::uint64_t len{}; - return get_number(input_format_t::cbor, len) && get_cbor_object(conditional_static_cast(len), tag_handler); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_number(input_format_t::cbor, len); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && get_cbor_object(conditional_static_cast(len), tag_handler); } case 0xBF: // map (indefinite length) + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_cbor_object(static_cast(-1), tag_handler); case 0xC6: // tagged item @@ -9888,7 +10110,10 @@ class binary_reader return parse_cbor_internal(true, tag_handler); } get(); - return get_cbor_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read); + const bool result_get = get_cbor_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } default: // LCOV_EXCL_LINE @@ -9898,16 +10123,20 @@ class binary_reader } case 0xF4: // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 0xF5: // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 0xF6: // null + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 0xF9: // Half-Precision Float (two-byte IEEE 754) { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); const auto byte1_raw = get(); if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(input_format_t::cbor, "number"))) { @@ -9949,6 +10178,7 @@ class binary_reader return std::ldexp(mant + 1024, exp - 25); } }(); + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->number_float((half & 0x8000u) != 0 ? static_cast(-val) : static_cast(val), ""); @@ -9957,12 +10187,14 @@ class binary_reader case 0xFA: // Single-Precision Float (four-byte IEEE 754) { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast(number), ""); } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::cbor, number) && sax->number_float(static_cast(number), ""); } @@ -10206,6 +10438,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_array(); } @@ -10231,7 +10464,10 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -10247,7 +10483,10 @@ class binary_reader { while (get() != 0xFF) { - if (JSON_HEDLEY_UNLIKELY(!get_cbor_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_cbor_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -10261,6 +10500,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_object(); } @@ -10408,6 +10648,7 @@ class binary_reader case 0x7D: case 0x7E: case 0x7F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_unsigned(static_cast(current)); // fixmap @@ -10427,6 +10668,7 @@ class binary_reader case 0x8D: case 0x8E: case 0x8F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_msgpack_object(conditional_static_cast(static_cast(current) & 0x0Fu)); // fixarray @@ -10446,6 +10688,7 @@ class binary_reader case 0x9D: case 0x9E: case 0x9F: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return get_msgpack_array(conditional_static_cast(static_cast(current) & 0x0Fu)); // fixstr @@ -10486,16 +10729,22 @@ class binary_reader case 0xDB: // str 32 { string_t s; - return get_msgpack_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } case 0xC0: // nil + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 0xC2: // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 0xC3: // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 0xC4: // bin 8 @@ -10511,90 +10760,107 @@ class binary_reader case 0xD8: // fixext 16 { binary_t b; - return get_msgpack_binary(b) && sax->binary(b); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_binary(b); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->binary(b); } case 0xCA: // float 32 { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast(number), ""); } case 0xCB: // float 64 { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_float(static_cast(number), ""); } case 0xCC: // uint 8 { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCD: // uint 16 { std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCE: // uint 32 { std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xCF: // uint 64 { std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_unsigned(number); } case 0xD0: // int 8 { std::int8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD1: // int 16 { std::int16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD2: // int 32 { std::int32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xD3: // int 64 { std::int64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format_t::msgpack, number) && sax->number_integer(number); } case 0xDC: // array 16 { std::uint16_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(static_cast(len)); } case 0xDD: // array 32 { std::uint32_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_array(conditional_static_cast(len)); } case 0xDE: // map 16 { std::uint16_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(static_cast(len)); } case 0xDF: // map 32 { std::uint32_t len{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(len)); return get_number(input_format_t::msgpack, len) && get_msgpack_object(conditional_static_cast(len)); } @@ -10631,6 +10897,7 @@ class binary_reader case 0xFD: case 0xFE: case 0xFF: + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->number_integer(static_cast(current)); default: // anything else @@ -10861,6 +11128,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_array(); } @@ -10879,7 +11147,10 @@ class binary_reader for (std::size_t i = 0; i < len; ++i) { get(); - if (JSON_HEDLEY_UNLIKELY(!get_msgpack_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_msgpack_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -10891,6 +11162,7 @@ class binary_reader key.clear(); } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return sax->end_object(); } @@ -11253,7 +11525,6 @@ class binary_reader return true; } } - string_t key = "_ArraySize_"; if (JSON_HEDLEY_UNLIKELY(!sax->start_object(3) || !sax->key(key) || !sax->start_array(dim.size()))) { @@ -11314,7 +11585,6 @@ class binary_reader bool is_ndarray = false; get_ignore_noop(); - if (current == '$') { result.second = get(); // must not ignore 'N', because 'N' maybe the type @@ -11343,7 +11613,9 @@ class binary_reader exception_message(input_format, concat("expected '#' after type information; last byte: 0x", last_token), "size"), nullptr)); } + // detail::sax_call_next_token_start_pos(sax, chars_read - 1); const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + //detail::sax_call_next_token_end_pos(sax, chars_read); if (input_format == input_format_t::bjdata && is_ndarray) { if (inside_ndarray) @@ -11358,7 +11630,9 @@ class binary_reader if (current == '#') { + // detail::sax_call_next_token_start_pos(sax, chars_read - 1); const bool is_error = get_ubjson_size_value(result.first, is_ndarray); + // detail::sax_call_next_token_end_pos(sax, chars_read); if (input_format == input_format_t::bjdata && is_ndarray) { return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read, @@ -11367,6 +11641,7 @@ class binary_reader return is_error; } + // detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read - 1); return true; } @@ -11382,40 +11657,47 @@ class binary_reader return unexpect_eof(input_format, "value"); case 'T': // true + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(true); case 'F': // false + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->boolean(false); case 'Z': // null + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->null(); case 'U': { std::uint8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } case 'i': { std::int8_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } case 'I': { std::int16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } case 'l': { std::int32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } - case 'L': { std::int64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_integer(number); } @@ -11426,6 +11708,7 @@ class binary_reader break; } std::uint16_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } @@ -11436,6 +11719,7 @@ class binary_reader break; } std::uint32_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } @@ -11446,11 +11730,13 @@ class binary_reader break; } std::uint64_t number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_unsigned(number); } case 'h': { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); if (input_format != input_format_t::bjdata) { break; @@ -11496,25 +11782,30 @@ class binary_reader return std::ldexp(mant + 1024, exp - 25); } }(); + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->number_float((half & 0x8000u) != 0 ? static_cast(-val) - : static_cast(val), ""); + : static_cast(val), + ""); } case 'd': { float number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_float(static_cast(number), ""); } case 'D': { double number{}; + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read + sizeof(number)); return get_number(input_format, number) && sax->number_float(static_cast(number), ""); } case 'H': { + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_high_precision_number(); } @@ -11532,19 +11823,25 @@ class binary_reader exception_message(input_format, concat("byte after 'C' must be in range 0x00..0x7F; last byte: 0x", last_token), "char"), nullptr)); } string_t s(1, static_cast(current)); + detail::sax_call_next_token_start_end_pos(sax, chars_read - 2, chars_read); return sax->string(s); } case 'S': // string { string_t s; - return get_ubjson_string(s) && sax->string(s); + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(s); + detail::sax_call_next_token_end_pos(sax, chars_read); + return result_get && sax->string(s); } case '[': // array + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_array(); case '{': // object + // call to detail::sax_call_next_token_start_end_pos inside of the method return get_ubjson_object(); default: // anything else @@ -11559,6 +11856,7 @@ class binary_reader */ bool get_ubjson_array() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); std::pair size_and_type; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -11583,6 +11881,7 @@ class binary_reader exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); } + detail::sax_call_next_token_end_pos(sax, chars_read); string_t type = it->second; // sax->string() takes a reference if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) { @@ -11594,6 +11893,7 @@ class binary_reader size_and_type.second = 'U'; } + detail::sax_call_next_token_start_end_pos(sax, chars_read); key = "_ArrayData_"; if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->start_array(size_and_type.first) )) { @@ -11602,17 +11902,20 @@ class binary_reader for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) { return false; } } + detail::sax_call_next_token_start_end_pos(sax, chars_read); return (sax->end_array() && sax->end_object()); } if (size_and_type.first != npos) { + detail::sax_call_next_token_end_pos(sax, chars_read); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) { return false; @@ -11624,6 +11927,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!get_ubjson_value(size_and_type.second))) { return false; @@ -11635,6 +11939,7 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { + // call to detail::sax_call_next_token_start_end_pos inside of the method if (JSON_HEDLEY_UNLIKELY(!parse_ubjson_internal())) { return false; @@ -11644,6 +11949,7 @@ class binary_reader } else { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) { return false; @@ -11659,6 +11965,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_array(); } @@ -11667,6 +11974,7 @@ class binary_reader */ bool get_ubjson_object() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); std::pair size_and_type; if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type))) { @@ -11684,6 +11992,7 @@ class binary_reader string_t key; if (size_and_type.first != npos) { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) { return false; @@ -11693,7 +12002,10 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -11708,7 +12020,10 @@ class binary_reader { for (std::size_t i = 0; i < size_and_type.first; ++i) { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -11722,6 +12037,7 @@ class binary_reader } else { + detail::sax_call_next_token_end_pos(sax, chars_read - 1); if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; @@ -11729,7 +12045,10 @@ class binary_reader while (current != '}') { - if (JSON_HEDLEY_UNLIKELY(!get_ubjson_string(key, false) || !sax->key(key))) + detail::sax_call_next_token_start_pos(sax, chars_read - 1); + const bool result_get = get_ubjson_string(key, false); + detail::sax_call_next_token_end_pos(sax, chars_read); + if (JSON_HEDLEY_UNLIKELY(!result_get || !sax->key(key))) { return false; } @@ -11742,6 +12061,7 @@ class binary_reader } } + detail::sax_call_next_token_start_end_pos(sax, chars_read - 1, chars_read); return sax->end_object(); } @@ -11750,6 +12070,7 @@ class binary_reader bool get_ubjson_high_precision_number() { + detail::sax_call_next_token_start_pos(sax, chars_read - 1); // get size of following number string std::size_t size{}; bool no_ndarray = true; @@ -11770,6 +12091,7 @@ class binary_reader } number_vector.push_back(static_cast(current)); } + detail::sax_call_next_token_end_pos(sax, chars_read); // parse number string using ia_type = decltype(detail::input_adapter(number_vector)); @@ -11967,6 +12289,7 @@ class binary_reader { if (JSON_HEDLEY_UNLIKELY(current == std::char_traits::eof())) { + detail::sax_call_next_token_end_pos(sax, chars_read); return sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); } @@ -12178,8 +12501,6 @@ class parser , m_lexer(std::move(adapter), skip_comments) , allow_exceptions(allow_exceptions_) { - // read first token - get_token(); } /*! @@ -12200,7 +12521,7 @@ class parser sax_parse_internal(&sdp); // in strict mode, input must be completely read - if (strict && (get_token() != token_type::end_of_input)) + if (strict && (get_token(&sdp) != token_type::end_of_input)) { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12228,7 +12549,7 @@ class parser sax_parse_internal(&sdp); // in strict mode, input must be completely read - if (strict && (get_token() != token_type::end_of_input)) + if (strict && (get_token(&sdp) != token_type::end_of_input)) { sdp.parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12266,7 +12587,7 @@ class parser const bool result = sax_parse_internal(sax); // strict mode: next byte must be EOF - if (result && strict && (get_token() != token_type::end_of_input)) + if (result && strict && (get_token(sax) != token_type::end_of_input)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12287,6 +12608,8 @@ class parser // value to avoid a goto (see comment where set to true) bool skip_to_state_evaluation = false; + // read first token + get_token(sax); while (true) { if (!skip_to_state_evaluation) @@ -12302,7 +12625,7 @@ class parser } // closing } -> we are done - if (get_token() == token_type::end_object) + if (get_token(sax) == token_type::end_object) { if (JSON_HEDLEY_UNLIKELY(!sax->end_object())) { @@ -12324,7 +12647,7 @@ class parser } // parse separator (:) - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12335,7 +12658,7 @@ class parser states.push_back(false); // parse values - get_token(); + get_token(sax); continue; } @@ -12347,7 +12670,7 @@ class parser } // closing ] -> we are done - if (get_token() == token_type::end_array) + if (get_token(sax) == token_type::end_array) { if (JSON_HEDLEY_UNLIKELY(!sax->end_array())) { @@ -12474,10 +12797,10 @@ class parser if (states.back()) // array { // comma -> next value - if (get_token() == token_type::value_separator) + if (get_token(sax) == token_type::value_separator) { // parse a new value - get_token(); + get_token(sax); continue; } @@ -12507,10 +12830,10 @@ class parser // states.back() is false -> object // comma -> next value - if (get_token() == token_type::value_separator) + if (get_token(sax) == token_type::value_separator) { // parse key - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::value_string)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::value_string)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12523,7 +12846,7 @@ class parser } // parse separator (:) - if (JSON_HEDLEY_UNLIKELY(get_token() != token_type::name_separator)) + if (JSON_HEDLEY_UNLIKELY(get_token(sax) != token_type::name_separator)) { return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), @@ -12531,7 +12854,7 @@ class parser } // parse values - get_token(); + get_token(sax); continue; } @@ -12559,10 +12882,19 @@ class parser } } - /// get next token from lexer - token_type get_token() + /// get next token from lexer and pass position info to sax (if it is accepted) + template + token_type get_token(SAX* sax) { - return last_token = m_lexer.scan(); + if (!m_lexer.scan_start()) + { + last_token = token_type::parse_error; + return token_type::parse_error; + } + detail::sax_call_next_token_start_pos(sax, m_lexer); + last_token = m_lexer.scan_end(); + detail::sax_call_next_token_end_pos(sax, m_lexer); + return last_token; } std::string exception_message(const token_type expected, const std::string& context) diff --git a/tests/src/unit-sax-parser-extended.cpp b/tests/src/unit-sax-parser-extended.cpp new file mode 100644 index 0000000000..b4e7dfa226 --- /dev/null +++ b/tests/src/unit-sax-parser-extended.cpp @@ -0,0 +1,1893 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.10.2 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#include "doctest_compatibility.h" + +#include + +// ignore warning to replace if with if constexpr since there are +// several in the file, just deactivate it here to prevent repeated ifdefs +DOCTEST_MSVC_SUPPRESS_WARNING(4127) + +//option to make this test more verbose +#define verbose_out \ + if (0) \ + std::cout + +//prototype to make -Wmissing-prototypes happy +struct element_info_t; +bool operator<(const element_info_t& l, const element_info_t& r); +std::ostream& operator<<(std::ostream& out, const element_info_t& v); +std::ostream& operator<<(std::ostream& out, const std::set& v); +template +void fill_expected_sax_pos_json(SAX& sax, + const FN& element, + const nlohmann::json& part, + std::size_t& offset); +template +void fill_expected_sax_pos_bson(SAX& sax, + const FN& element, + const nlohmann::json& part, + std::size_t& offset); +template +void fill_expected_sax_pos_cbor(SAX& sax, const FN& element, const nlohmann::json& part); +template +void fill_expected_sax_pos_msgpack(SAX& sax, const FN& element, const nlohmann::json& part); +template +void fill_expected_sax_pos_ubjson(SAX& sax, const FN& element, const nlohmann::json& part); +void test_json(nlohmann::json& json); + +//implementation + +struct element_info_t +{ + element_info_t(std::size_t idx, std::size_t first, std::size_t last) + : index{idx} + , start{first} + , end{last} + {} + std::size_t index = 0; + std::size_t start = 0; + std::size_t end = 0; +}; +bool operator<(const element_info_t& l, const element_info_t& r) +{ + return std::tie(l.index, l.start, l.end) < std::tie(r.index, r.start, r.end); +} +std::ostream& operator<<(std::ostream& out, const element_info_t& v) +{ + return (out << v.index << ':' << v.start << '-' << v.end + << '(' << v.end - v.start << ')'); +} +std::ostream& operator<<(std::ostream& out, const std::set& v) +{ + out << "{"; + if (v.size() > 32) + { + out << ">32 elements..."; + } + else + { + for (const auto& e : v) + { + out << ' ' << e; + } + } + out << " }"; + return out; +} + +template +struct Sax +{ + static constexpr bool has_callback = WithPos || (WithLex && !LexCallImpossible); + using json = nlohmann::json; + + enum class last_call_t + { + element, + start_pos, + end_pos + }; + + last_call_t last_call = last_call_t::element; + + element_info_t se{0, 0, 0}; + + std::set pos_null{}; + std::set pos_boolean{}; + std::set pos_number_integer{}; + std::set pos_number_unsigned{}; + std::set pos_number_float{}; + std::set pos_string{}; + std::set pos_binary{}; + std::set pos_start_object{}; + std::set pos_key{}; + std::set pos_end_object{}; + std::set pos_start_array{}; + std::set pos_end_array{}; + + void check_call(std::set& set, const char* fnname) + { + INFO("function " << fnname << ": " << se + << " (options = " << set << ')'); + if (has_callback) + { + CHECK(set.count(se) == 1); + CHECK(last_call == last_call_t::end_pos); + } + last_call = last_call_t::element; + set.erase(se); + ++se.index; + } + void check_start(std::size_t pos) + { + INFO("set start pos " << pos); + CHECK((last_call == last_call_t::element || last_call == last_call_t::end_pos)); + se.start = pos; + last_call = last_call_t::start_pos; + } + void check_end(std::size_t pos) + { + INFO("set end pos " << pos); + CHECK(last_call == last_call_t::start_pos); + se.end = pos; + last_call = last_call_t::end_pos; + } + + template + typename std::enable_if::type next_token_start(std::size_t pos) + { + check_start(pos); + CHECK((!WithLex || LexCallImpossible)); + } + + template < class LexT, bool Act = WithLex && !std::is_same::value > + typename std::enable_if::type next_token_start(const LexT& lex) + { + check_start(lex.get_position().chars_read_total - 1); + CHECK(WithLex); + } + + template + typename std::enable_if::type next_token_end(std::size_t pos) + { + check_end(pos); + CHECK((!WithLex || LexCallImpossible)); + } + + template < class LexT, bool Act = WithLex && !std::is_same::value > + typename std::enable_if::type next_token_end(const LexT& lex) + { + check_end(lex.get_position().chars_read_total); + CHECK(WithLex); + } + + bool null() + { + check_call(pos_null, __func__); + verbose_out << "got null\n"; + return true; + } + bool boolean(bool val) + { + check_call(pos_boolean, __func__); + verbose_out << "got boolean " << val << "\n"; + return true; + } + bool number_integer(json::number_integer_t val) + { + check_call(pos_number_integer, __func__); + verbose_out << "got number_integer " << val << "\n"; + return true; + } + bool number_unsigned(json::number_unsigned_t val) + { + check_call(pos_number_unsigned, __func__); + verbose_out << "got number_unsigned " << val << "\n"; + return true; + } + bool number_float(json::number_float_t val, const std::string& str) + { + check_call(pos_number_float, __func__); + verbose_out << "got float " << val << " (" << str << ")" + << "\n"; + return true; + } + bool string(std::string& val) + { + check_call(pos_string, __func__); + verbose_out << "got string " << val << "\n"; + return true; + } + bool binary(std::vector& val) + { + check_call(pos_binary, __func__); + verbose_out << "got binary: size " << val.size() << "\n"; + return true; + } + bool start_object(std::size_t val) + { + check_call(pos_start_object, __func__); + verbose_out << "got start_object: size " << val << "\n"; + return true; + } + bool key(std::string& val) + { + check_call(pos_key, __func__); + verbose_out << "got key " << val << "\n"; + return true; + } + bool end_object() + { + check_call(pos_end_object, __func__); + verbose_out << "got end_object\n"; + return true; + } + bool start_array(std::size_t val) + { + check_call(pos_start_array, __func__); + verbose_out << "got start_array: size " << val << "\n"; + return true; + } + bool end_array() + { + check_call(pos_end_array, __func__); + verbose_out << "got end_array\n"; + return true; + } + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const json::exception& /*unused*/) // NOLINT(readability-convert-member-functions-to-static) + { + std::cout << "got parse_error\n"; + CHECK(false); // should not happen + return false; + } + void check_all_pos_found() + { + INFO("check all null were found (elements left: " << pos_null << ')'); + CHECK(pos_null.empty()); + INFO("check all boolean were found (elements left: " << pos_boolean << ')'); + CHECK(pos_boolean.empty()); + INFO("check all number_integer were found (elements left: " << pos_number_integer << ')'); + CHECK(pos_number_integer.empty()); + INFO("check all number_unsigned were found (elements left: " << pos_number_unsigned << ')'); + CHECK(pos_number_unsigned.empty()); + INFO("check all number_float were found (elements left: " << pos_number_float << ')'); + CHECK(pos_number_float.empty()); + INFO("check all string were found (elements left: " << pos_string << ')'); + CHECK(pos_string.empty()); + INFO("check all binary were found (elements left: " << pos_binary << ')'); + CHECK(pos_binary.empty()); + INFO("check all start_object were found (elements left: " << pos_start_object << ')'); + CHECK(pos_start_object.empty()); + INFO("check all key were found (elements left: " << pos_key << ')'); + CHECK(pos_key.empty()); + INFO("check all end_object were found (elements left: " << pos_end_object << ')'); + CHECK(pos_end_object.empty()); + INFO("check all start_array were found (elements left: " << pos_start_array << ')'); + CHECK(pos_start_array.empty()); + INFO("check all end_array were found (elements left: " << pos_end_array << ')'); + CHECK(pos_end_array.empty()); + } +}; + +template +struct Opt +{ + static constexpr bool WithPos = WithPosV; + static constexpr bool WithLex = WithLexV; +}; + +using OptNone = Opt; +using OptLex = Opt; +using OptPos = Opt; +using OptBoth = Opt; + +//test basic functionality +TEST_CASE_TEMPLATE("extended parser", T, OptNone, OptLex, OptPos, OptBoth) +{ + const bool with_pos = T::WithPos; + const bool with_lex = T::WithLex; + + INFO("WithPos " << with_pos << ", WithLex " << with_lex); + //element count 0 1 2 3 4 5 6 7 8 9 10 + //index 10s place 0 1 2 3 4 5 + //index 1s place 012345678901234567890123456789012345678901234567890123 + const std::string str = R"({ "array" : [14294967296,-1,true,4.2,null,"str" ] })"; + std::size_t elem_idx = 0; + std::size_t char_idx = 0; + const auto element = [&](std::size_t bytes) + { + const auto start = char_idx; + char_idx += bytes; + return element_info_t{elem_idx++, start, char_idx}; + }; + const auto skip = [&](std::size_t bytes) + { + char_idx += bytes; + }; + SECTION("json") + { + std::string reconstructed; + const auto elementFromStr = [&](const std::string & s) + { + reconstructed += s; + return element(s.size()); + }; + const auto skipFromStr = [&](const std::string & s) + { + reconstructed += s; + skip(s.size()); + }; + Sax sax; + sax.pos_start_object.emplace(elementFromStr("{")); + skipFromStr(" "); + sax.pos_key.emplace(elementFromStr(R"("array")")); + skipFromStr(" : "); + sax.pos_start_array.emplace(elementFromStr("[")); + sax.pos_number_unsigned.emplace(elementFromStr("14294967296")); + skipFromStr(","); + sax.pos_number_integer.emplace(elementFromStr("-1")); + skipFromStr(","); + sax.pos_boolean.emplace(elementFromStr("true")); + skipFromStr(","); + sax.pos_number_float.emplace(elementFromStr("4.2")); + skipFromStr(","); + sax.pos_null.emplace(elementFromStr("null")); + skipFromStr(","); + sax.pos_string.emplace(elementFromStr(R"("str")")); + skipFromStr(" "); + sax.pos_end_array.emplace(elementFromStr("]")); + skipFromStr(" "); + sax.pos_end_object.emplace(elementFromStr("}")); + CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json)); + if (with_pos || with_lex) + { + sax.check_all_pos_found(); + } + CHECK(char_idx == str.size()); + CHECK(str == reconstructed); + } + SECTION("bson") + { + const auto j = nlohmann::json::parse(str); + const auto bin = nlohmann::json::to_bson(j); + Sax sax; + sax.pos_start_object.emplace(element(4)); //4 bytes size + skip(1); //one byte type array + sax.pos_key.emplace(element(6)); //6 key (array\0) + sax.pos_start_array.emplace(element(4)); //4 bytes size + skip(3); //one byte type + key 0\0 + sax.pos_number_integer.emplace(element(8)); //8 bytes int64 + skip(3); //one byte type + key 1\0 + sax.pos_number_integer.emplace(element(4)); //4 bytes int32 + skip(3); //one byte type + key 2\0 + sax.pos_boolean.emplace(element(1)); //1 byte bool + skip(3); //one byte type + key 3\0 + sax.pos_number_float.emplace(element(8)); //8 bytes double + skip(3); //one byte type + key 4\0 + sax.pos_null.emplace(element((0))); //0 bytes + skip(3); //one byte type + key 4\0 + sax.pos_string.emplace(element(8)); //4 bytes size + (str\0) + sax.pos_end_array.emplace(element(1)); //1 byte \0 end of array + sax.pos_end_object.emplace(element(1)); //1 byte \0 end of object + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::bson)); + if (with_pos) + { + sax.check_all_pos_found(); + } + } + SECTION("cbor") + { + const auto j = nlohmann::json::parse(str); + const auto bin = nlohmann::json::to_cbor(j); + Sax sax; + sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) + sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit) + sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) + sax.pos_number_unsigned.emplace(element(9)); //1 byte type + 8 bytes uint64 + sax.pos_number_integer.emplace(element(1)); //1 byte type + 0 bytes int -> implicit value since small + sax.pos_boolean.emplace(element(1)); //1 byte type + 0 byte bool (value in type) + sax.pos_number_float.emplace(element(9)); //1 byte type + 8 bytes double + sax.pos_null.emplace(element((1))); //1 byte type + 0 bytes + sax.pos_string.emplace(element(4)); //1 byte type + 3 bytes string (str) (size implicit) + sax.pos_end_array.emplace(element(0)); //0 byte end of array + sax.pos_end_object.emplace(element(0)); //0 byte end of object + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::cbor)); + if (with_pos) + { + sax.check_all_pos_found(); + } + } + SECTION("msgpack") + { + const auto j = nlohmann::json::parse(str); + const auto bin = nlohmann::json::to_msgpack(j); + Sax sax; + sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size + sax.pos_key.emplace(element(6)); //1 byte type + 5 bytes string (array) (size implicit) + sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) + sax.pos_number_unsigned.emplace(element(9)); //1 byte type + 8 bytes uint64 + sax.pos_number_integer.emplace(element(1)); //1 byte type + 0 bytes int -> implicit value since small + sax.pos_boolean.emplace(element(1)); //1 byte type + 0 byte bool (value in type) + sax.pos_number_float.emplace(element(9)); //1 byte type + 8 bytes double + sax.pos_null.emplace(element((1))); //1 byte type + 0 bytes + sax.pos_string.emplace(element(4)); //1 byte type + 3 bytes string (str) (size implicit) + sax.pos_end_array.emplace(element(0)); //0 byte end of array + sax.pos_end_object.emplace(element(0)); //0 byte end of object + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::msgpack)); + if (with_pos) + { + sax.check_all_pos_found(); + } + } + SECTION("ubjson") + { + const auto j = nlohmann::json::parse(str); + const auto bin = nlohmann::json::to_ubjson(j); + Sax sax; + sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size + sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0) + sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) + sax.pos_number_integer.emplace(element(9)); //1 byte type + 8 bytes uint64 + sax.pos_number_integer.emplace(element(2)); //1 byte type + 1 bytes int8 + sax.pos_boolean.emplace(element(1)); //1 byte type + 0 byte bool (value in type) + sax.pos_number_float.emplace(element(9)); //1 byte type + 8 bytes double + sax.pos_null.emplace(element((1))); //1 byte type + 0 bytes + sax.pos_string.emplace(element(6)); //1 type + 1 type of len + 1 len +3 string (str) + sax.pos_end_array.emplace(element(1)); //1 byte type + 0 byte end of array + sax.pos_end_object.emplace(element(1)); //1 byte type + 0 byte end of object + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::ubjson)); + if (with_pos) + { + sax.check_all_pos_found(); + } + } + SECTION("bjdata") + { + const auto j = nlohmann::json::parse(str); + const auto bin = nlohmann::json::to_bjdata(j); + Sax sax; + sax.pos_start_object.emplace(element(1)); //1 byte type + 0 bytes size + sax.pos_key.emplace(element(7)); //1 byte type + 6 bytes string (array\0) + sax.pos_start_array.emplace(element(1)); //1 byte type + 0 bytes size (implicit in type) + sax.pos_number_integer.emplace(element(9)); //1 byte type + 8 bytes uint64 + sax.pos_number_integer.emplace(element(2)); //1 byte type + 1 bytes int8 + sax.pos_boolean.emplace(element(1)); //1 byte type + 0 byte bool (value in type) + sax.pos_number_float.emplace(element(9)); //1 byte type + 8 bytes double + sax.pos_null.emplace(element((1))); //1 byte type + 0 bytes + sax.pos_string.emplace(element(6)); //1 type + 1 type of len + 1 len +3 string (str) + sax.pos_end_array.emplace(element(1)); //1 byte type + 0 byte end of array + sax.pos_end_object.emplace(element(1)); //1 byte type + 0 byte end of object + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::bjdata)); + if (with_pos) + { + sax.check_all_pos_found(); + } + } +} + +//cover more advanced cases (e.g. msgpack fixint) (but only use one templated version) +template +void fill_expected_sax_pos_json(SAX& sax, + const FN& element, + const nlohmann::json& part, + std::size_t& offset) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + sax.pos_null.emplace(element(4)); //null + } + break; + case nlohmann::json::value_t::object: + { + sax.pos_start_object.emplace(element(1)); // { + for (auto& el : part.items()) + { + sax.pos_key.emplace(element(el.key().size() + 2)); //'"' + str + '"' + offset += 1; // separator ':' between key and value + fill_expected_sax_pos_json(sax, element, el.value(), offset); + offset += 1; // add , + } + if (!part.empty()) + { + offset -= 1; // remove last , + } + sax.pos_end_object.emplace(element(1)); // } + } + break; + case nlohmann::json::value_t::array: + { + sax.pos_start_array.emplace(element(1)); // [ + for (auto& el : part.items()) + { + fill_expected_sax_pos_json(sax, element, el.value(), offset); + offset += 1; // add , + } + if (!part.empty()) + { + offset -= 1; // remove last , + } + sax.pos_end_array.emplace(element(1)); // ] + } + break; + case nlohmann::json::value_t::string: + { + const auto val = part.get(); + std::size_t nbytes = val.size() + 2; //'"' + value + '"' + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + const auto val = part.get(); + if (val) + { + sax.pos_boolean.emplace(element(4)); // true + } + else + { + sax.pos_boolean.emplace(element(5)); // false + } + } + break; + case nlohmann::json::value_t::number_integer: + { + const auto val = part.get(); + std::size_t nbytes = std::to_string(val).size(); + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + const auto val = part.get(); + std::size_t nbytes = std::to_string(val).size(); + sax.pos_number_unsigned.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_float: + { + const auto val = part.get(); + std::size_t nbytes = std::to_string(val).size(); + sax.pos_number_float.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::binary: + { + //stored as object with array and subtype + nlohmann::json sub; + sub["bytes"] = nlohmann::json::array(); + for (const auto e : part.get_binary()) + { + sub["bytes"].emplace_back(e); + } + sub["subtype"]; + fill_expected_sax_pos_json(sax, element, sub, offset); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +template +void fill_expected_sax_pos_bson(SAX& sax, + const FN& element, + const nlohmann::json& part, + std::size_t& offset) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + //type is before the key -> not included + sax.pos_null.emplace(element(0)); + } + break; + case nlohmann::json::value_t::object: + { + sax.pos_start_object.emplace(element(4)); //32 bit size + for (auto& el : part.items()) + { + offset += 1; // type of item + sax.pos_key.emplace(element(el.key().size() + 1)); // str + terminator + fill_expected_sax_pos_bson(sax, element, el.value(), offset); + } + sax.pos_end_object.emplace(element(1)); // \0 terminator + } + break; + case nlohmann::json::value_t::array: + { + sax.pos_start_array.emplace(element(4)); //32 bit size + std::size_t i = 0; + for (auto& el : part.items()) + { + offset += 1; // type of item + offset += 1 + std::to_string(i).size(); // dummy key + terminator + fill_expected_sax_pos_bson(sax, element, el.value(), offset); + ++i; + } + sax.pos_end_array.emplace(element(1)); // \0 terminator + } + break; + case nlohmann::json::value_t::string: + { + //type is before the key -> not included + std::size_t nbytes = 4; //size + const auto val = part.get(); + nbytes += val.size() + 1; //value + \0 terminate + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + //type is before the key -> not included + std::size_t nbytes = 1; //value + sax.pos_boolean.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_integer: + { + std::size_t nbytes = 0; //type is before the key -> not included + const auto val = part.get(); + //for <-24 : -n-1 + if (val >= 0) + { + std::cout << "unexpected int >= 0\n"; + throw std::logic_error{"unexpected int >= 0"}; + } + if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + std::size_t nbytes = 0; //type is before the key -> not included + const auto val = part.get(); + if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_float: + { + std::size_t nbytes = 0; //type is before the key -> not included + nbytes += 8; //value + sax.pos_number_float.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::binary: + { + std::size_t nbytes = 0; //type is before the key -> not included + nbytes += 4; // length of bin (32 bit) + nbytes += 1; // subtype + nbytes += part.get_binary().size(); + sax.pos_binary.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +template +void fill_expected_sax_pos_cbor(SAX& sax, const FN& element, const nlohmann::json& part) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + std::size_t nbytes = 1; //type + sax.pos_null.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::object: + { + std::size_t nbytes = 1; //type + if (part.size() <= 0x17) + { + //size implicit in type + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 1; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 2; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_start_object.emplace(element(nbytes)); + //key follows same rules as string + for (auto& el : part.items()) + { + std::size_t nbyteskey = 1; //type + nbyteskey += el.key().size(); + if (el.key().size() <= 0x17) + { + //size implicit in type + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 1; + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 2; + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 4; + } + else + { + nbyteskey += 8; + } + sax.pos_key.emplace(element(nbyteskey)); + fill_expected_sax_pos_cbor(sax, element, el.value()); + } + sax.pos_end_object.emplace(element(0)); + } + break; + case nlohmann::json::value_t::array: + { + std::size_t nbytes = 1; //type + if (part.size() <= 0x17) + { + //size implicit in type + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 1; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 2; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_start_array.emplace(element(nbytes)); + //add elements + for (const auto& elem : part) + { + fill_expected_sax_pos_cbor(sax, element, elem); + } + sax.pos_end_array.emplace(element(0)); + } + break; + case nlohmann::json::value_t::string: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + nbytes += val.size(); + if (val.size() <= static_cast(0x17)) + { + //size implicit in type + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + std::size_t nbytes = 1; //type + sax.pos_boolean.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_integer: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + //for <-24 : -n-1 + if (val >= 0) + { + std::cout << "unexpected int >= 0\n"; + throw std::logic_error{"unexpected int >= 0"}; + } + if (val >= -24) + { + //value implicit in type + } + else if (-val - 1 <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (-val - 1 <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (-val - 1 <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + if (val <= static_cast(0x17)) + { + //value implicit in type + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_unsigned.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_float: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + //really depends on the input type + if (val < 0) + { + std::cout << "unexpected float <0\n"; + throw std::logic_error{"unexpected float <0"}; + } + if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; //float + } + else + { + nbytes += 8; //double float + } + sax.pos_number_float.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::binary: + { + std::size_t nbytes = 1; //type + const auto& val = part.get_binary(); + nbytes += val.size(); + if (val.size() <= static_cast(0x17)) + { + //size implicit in type + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_binary.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +template +void fill_expected_sax_pos_msgpack(SAX& sax, const FN& element, const nlohmann::json& part) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + std::size_t nbytes = 1; //type + sax.pos_null.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::object: + { + std::size_t nbytes = 1; //type + if (part.size() <= 0x0F) + { + //size implicit in type + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 2; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_start_object.emplace(element(nbytes)); + //key follows same rules as string + for (auto& el : part.items()) + { + std::size_t nbyteskey = 1; //type + nbyteskey += el.key().size(); + if (el.key().size() <= 0x1F) + { + //size implicit in type + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 1; + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 2; + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 4; + } + else + { + nbyteskey += 8; + } + sax.pos_key.emplace(element(nbyteskey)); + fill_expected_sax_pos_msgpack(sax, element, el.value()); + } + sax.pos_end_object.emplace(element(0)); + } + break; + case nlohmann::json::value_t::array: + { + std::size_t nbytes = 1; //type + if (part.size() <= 0x0F) + { + //size implicit in type + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 2; + } + else if (part.size() <= std::numeric_limits::max()) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_start_array.emplace(element(nbytes)); + //add elements + for (const auto& elem : part) + { + fill_expected_sax_pos_msgpack(sax, element, elem); + } + sax.pos_end_array.emplace(element(0)); + } + break; + case nlohmann::json::value_t::string: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + nbytes += val.size(); + if (val.size() <= static_cast(0x1F)) + { + //size implicit in type + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + std::size_t nbytes = 1; //type + sax.pos_boolean.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_integer: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + //for <-24 : -n-1 + if (val >= 0) + { + std::cout << "unexpected int >= 0\n"; + throw std::logic_error{"unexpected int >= 0"}; + } + if (val >= -32) + { + //value implicit in type + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 1; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 2; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + if (val <= static_cast(0x7F)) + { + //value implicit in type + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_unsigned.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_float: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + //really depends on the input type + if (val < 0) + { + std::cout << "unexpected float <0\n"; + throw std::logic_error{"unexpected float <0"}; + } + if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; //float + } + else + { + nbytes += 8; //double float + } + sax.pos_number_float.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::binary: + { + std::size_t nbytes = 1; //type + const auto& val = part.get_binary(); + nbytes += val.size(); + if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_binary.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +template +void fill_expected_sax_pos_ubjson(SAX& sax, const FN& element, const nlohmann::json& part) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + std::size_t nbytes = 1; //type + sax.pos_null.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::object: + { + sax.pos_start_object.emplace(element(1)); + //key follows same rules as string + for (auto& el : part.items()) + { + std::size_t nbyteskey = 1; //type of len + nbyteskey += el.key().size(); + if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 1; // size of len + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 2; // size of len + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 4; // size of len + } + else + { + nbyteskey += 8; // size of len + } + sax.pos_key.emplace(element(nbyteskey)); + fill_expected_sax_pos_ubjson(sax, element, el.value()); + } + sax.pos_end_object.emplace(element(1)); + } + break; + case nlohmann::json::value_t::array: + { + sax.pos_start_array.emplace(element(1)); + //add elements + for (const auto& elem : part) + { + fill_expected_sax_pos_ubjson(sax, element, elem); + } + sax.pos_end_array.emplace(element(1)); + } + break; + case nlohmann::json::value_t::string: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + nbytes += val.size(); + nbytes += 1; // type of length + if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + std::size_t nbytes = 1; //type + sax.pos_boolean.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_integer: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + //for <-24 : -n-1 + if (val >= 0) + { + std::cout << "unexpected int >= 0\n"; + throw std::logic_error{"unexpected int >= 0"}; + } + if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 1; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 2; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + //supported integer types : + // uint8 + // int8/16/32/64/High precision + // --> only 128-255 are stored as uint + high precision > max int64 + bool use_uint = false; + std::size_t nbytes = 1; //type + const auto val = part.get(); + if (val < 128) + { + ++nbytes; + } + else if (val >= 128 && val <= 255) + { + use_uint = true; + ++nbytes; + } + else + { + //sorted as signed int! + if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 8; + } + else + { + //High precision + //more complex calculation of size is not done here + //the size includes + // type (high precision) + // type of size of value length + // size of value length + // value as array of chars + //in this case + nbytes = 22; + if (val > std::numeric_limits::max() - 128) + { + //in this test case the value needs one more char + nbytes += 1; + } + if (val > static_cast(std::numeric_limits::max())) + { + use_uint = true; + } + } + } + if (use_uint) + { + sax.pos_number_unsigned.emplace(element(nbytes)); + } + else + { + sax.pos_number_integer.emplace(element(nbytes)); + } + } + break; + case nlohmann::json::value_t::number_float: + { + //everything is serialized as double (type+double value) + sax.pos_number_float.emplace(element(8 + 1)); + } + break; + case nlohmann::json::value_t::binary: + { + // Note, no reader for UBJSON binary types is implemented because they do + auto sub = nlohmann::json::array(); + for (const auto i : part.get_binary()) + { + sub.emplace_back(i); + } + fill_expected_sax_pos_ubjson(sax, element, sub); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +template +void fill_expected_sax_pos_bjdata(SAX& sax, const FN& element, const nlohmann::json& part) +{ + switch (part.type()) + { + case nlohmann::json::value_t::null: + { + std::size_t nbytes = 1; //type + sax.pos_null.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::object: + { + sax.pos_start_object.emplace(element(1)); + //key follows same rules as string + for (auto& el : part.items()) + { + std::size_t nbyteskey = 1; //type of len + nbyteskey += el.key().size(); + if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 1; // size of len + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 2; // size of len + } + else if (el.key().size() <= std::numeric_limits::max()) + { + nbyteskey += 4; // size of len + } + else + { + nbyteskey += 8; // size of len + } + sax.pos_key.emplace(element(nbyteskey)); + fill_expected_sax_pos_bjdata(sax, element, el.value()); + } + sax.pos_end_object.emplace(element(1)); + } + break; + case nlohmann::json::value_t::array: + { + sax.pos_start_array.emplace(element(1)); + //add elements + for (const auto& elem : part) + { + fill_expected_sax_pos_bjdata(sax, element, elem); + } + sax.pos_end_array.emplace(element(1)); + } + break; + case nlohmann::json::value_t::string: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + nbytes += val.size(); + nbytes += 1; // type of length + if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val.size() <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_string.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::boolean: + { + std::size_t nbytes = 1; //type + sax.pos_boolean.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_integer: + { + std::size_t nbytes = 1; //type + const auto val = part.get(); + if (val >= 0) + { + std::cout << "unexpected int >= 0\n"; + throw std::logic_error{"unexpected int >= 0"}; + } + if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 1; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 2; + } + else if (val >= static_cast(std::numeric_limits::min())) + { + nbytes += 4; + } + else + { + nbytes += 8; + } + sax.pos_number_integer.emplace(element(nbytes)); + } + break; + case nlohmann::json::value_t::number_unsigned: + { + auto* category = &sax.pos_number_unsigned; + std::size_t nbytes = 1; //type + const auto val = part.get(); + if (val <= static_cast(std::numeric_limits::max())) + { + //the serializer uses int8 for these values + category = &sax.pos_number_integer; + nbytes += 1; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 1; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + //the serializer uses int6 for these values + category = &sax.pos_number_integer; + nbytes += 2; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 2; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + //the serializer uses int32 for these values + category = &sax.pos_number_integer; + nbytes += 4; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + nbytes += 4; + } + else if (val <= static_cast(std::numeric_limits::max())) + { + //the serializer uses int64 for these values + category = &sax.pos_number_integer; + nbytes += 8; + } + else + { + nbytes += 8; + } + category->emplace(element(nbytes)); + ////supported integer types : + //// uint8, uint16, uint32, uint64 + //bool use_uint = false; + //std::size_t nbytes = 1; //type + //const auto val = part.get(); + //if (val < 128) + //{ + // ++nbytes; + //} + //else if (val >= 128 && val <= 255) + //{ + // use_uint = true; + // ++nbytes; + //} + //else + //{ + // //sorted as signed int! + // if (val <= static_cast(std::numeric_limits::max())) + // { + // nbytes += 1; + // } + // else if (val <= static_cast(std::numeric_limits::max())) + // { + // nbytes += 2; + // } + // else if (val <= static_cast(std::numeric_limits::max())) + // { + // nbytes += 4; + // } + // else if (val <= static_cast(std::numeric_limits::max())) + // { + // nbytes += 8; + // } + // else + // { + // //High precision + // //more complex calculation of size is not done here + // //the size includes + // // type (high precision) + // // type of size of value length + // // size of value length + // // value as array of chars + // //in this case + // nbytes = 22; + // if (val > std::numeric_limits::max() - 128) + // { + // //in this test case the value needs one more char + // nbytes += 1; + // } + // if (val > static_cast(std::numeric_limits::max())) + // { + // use_uint = true; + // } + // } + //} + //if (use_uint) + //{ + // sax.pos_number_unsigned.emplace(element(nbytes)); + //} + //else + //{ + // sax.pos_number_integer.emplace(element(nbytes)); + //} + } + break; + case nlohmann::json::value_t::number_float: + { + //everything is serialized as double (type+double value) + sax.pos_number_float.emplace(element(8 + 1)); + } + break; + case nlohmann::json::value_t::binary: + { + // Note, no reader for UBJSON binary types is implemented because they do + auto sub = nlohmann::json::array(); + for (const auto i : part.get_binary()) + { + sub.emplace_back(i); + } + fill_expected_sax_pos_ubjson(sax, element, sub); + } + break; + case nlohmann::json::value_t::discarded: + { + std::cout << "unexpected! value_t::discarded\n"; + throw std::logic_error{"unexpected! value_t::discarded"}; + } + break; + default: + throw std::logic_error{"unexpected! default"}; + } +} + +void test_json(nlohmann::json& json) +{ + Sax sax; + std::size_t elem_idx = 0; + std::size_t char_idx = 0; + const auto element = [&](std::size_t bytes) + { + const auto start = char_idx; + char_idx += bytes; + return element_info_t{elem_idx++, start, char_idx}; + }; + SECTION("json") + { + const auto bin = json.dump(); + std::cout << "json has size of " << bin.size() << '\n'; + fill_expected_sax_pos_json(sax, element, json, char_idx); + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::json)); + sax.check_all_pos_found(); + } + SECTION("bson") + { + //since bson can't deal with values > int64 max we need to remove some + if (json.contains("uints")) + { + auto& ar = json["uints"]; + const std::uint64_t limit = std::numeric_limits::max(); + while (ar.back() > limit) + { + ar.erase(ar.size() - 1); + } + } + const auto bin = nlohmann::json::to_bson(json); + std::cout << "bson has size of " << bin.size() << '\n'; + fill_expected_sax_pos_bson(sax, element, json, char_idx); + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::bson)); + sax.check_all_pos_found(); + } + SECTION("cbor") + { + const auto bin = nlohmann::json::to_cbor(json); + std::cout << "cbor has size of " << bin.size() << '\n'; + fill_expected_sax_pos_cbor(sax, element, json); + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::cbor)); + sax.check_all_pos_found(); + } + SECTION("msgpack") + { + const auto bin = nlohmann::json::to_msgpack(json); + std::cout << "msgpack has size of " << bin.size() << '\n'; + fill_expected_sax_pos_msgpack(sax, element, json); + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::msgpack)); + sax.check_all_pos_found(); + } + SECTION("ubjson") + { + const auto bin = nlohmann::json::to_ubjson(json); + std::cout << "ubjson has size of " << bin.size() << '\n'; + fill_expected_sax_pos_ubjson(sax, element, json); + CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::ubjson)); + sax.check_all_pos_found(); + } + SECTION("bjdata") + { + const auto bin = nlohmann::json::to_bjdata(json); + std::cout << "bjdata has size of " << bin.size() << '\n'; + fill_expected_sax_pos_bjdata(sax, element, json); + //CHECK(char_idx == bin.size()); + CHECK(nlohmann::json::sax_parse(bin, &sax, nlohmann::json::input_format_t::bjdata)); + sax.check_all_pos_found(); + } +} + +TEST_CASE("extended parser generated (uint)") +{ + std::cout << "extended parser generated (uint) "; + nlohmann::json json; + auto& array = json["uints"]; + for (std::uint64_t i = 0; i < 512; ++i) + { + array.emplace_back(i); + } + //check area around key points + const auto add_area = [&](std::uint64_t mid, std::uint64_t lower, std::uint64_t higher) + { + for (std::uint64_t i = mid - lower; i < mid + higher; ++i) + { + array.emplace_back(i); + } + array.emplace_back(mid + higher); + }; + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max(), 32, 32); + + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max(), 32, 32); + + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max() / 2, 32, 32); + add_area(std::numeric_limits::max(), 32, 0); + test_json(json); +} +TEST_CASE("extended parser generated (int)") +{ + std::cout << "extended parser generated (int) "; + nlohmann::json json; + auto& array = json["ints"]; + for (std::int64_t i = -512; i <= -1; ++i) + { + array.emplace_back(i); + } + //check area around key points + const auto add_area = [&](std::int64_t mid, std::int64_t lower, std::int64_t higher) + { + for (std::int64_t i = mid - lower; i <= mid + higher; ++i) + { + array.emplace_back(i); + } + }; + add_area(std::numeric_limits::min(), 32, 32); + add_area(std::numeric_limits::min(), 32, 32); + add_area(std::numeric_limits::min(), 32, 32); + add_area(std::numeric_limits::min(), 0, 32); + test_json(json); +} +TEST_CASE("extended parser generated (array / bool)") +{ + std::cout << "extended parser generated (array / bool) "; + nlohmann::json json; + auto& array = json["arrays"]; + array = nlohmann::json::array(); + for (std::uint64_t i = 0; i < 512; ++i) + { + auto sub = nlohmann::json::array(); + for (std::uint64_t j = 0; j < i; ++j) + { + sub.emplace_back((j % 2 == 0)); + } + array.emplace_back(std::move(sub)); + } + //add large aray + auto sub = nlohmann::json::array(); + for (std::uint64_t j = 0; j < std::numeric_limits::max() + 1; ++j) + { + sub.emplace_back((j % 2 == 0)); + } + array.emplace_back(std::move(sub)); + test_json(json); +} +TEST_CASE("extended parser generated (object / null)") +{ + std::cout << "extended parser generated (object / null) "; + nlohmann::json json; + auto& array = json["objects"]; + array = nlohmann::json::array(); + for (std::uint64_t i = 0; i < 512; ++i) + { + auto sub = nlohmann::json::object(); + for (std::uint64_t j = 0; j < i; ++j) + { + sub[std::string(static_cast(j), 'k')]; + + } + array.emplace_back(std::move(sub)); + } + //add object with long keĆ½ + auto sub = nlohmann::json::object(); + sub[std::string(std::numeric_limits::max() + 1, 'k')]; + array.emplace_back(std::move(sub)); + test_json(json); +} +TEST_CASE("extended parser generated (string)") +{ + std::cout << "extended parser generated (string) "; + nlohmann::json json; + auto& array = json["strings"]; + array = nlohmann::json::array(); + for (std::uint64_t i = 0; i < 512; ++i) + { + array.emplace_back(std::string(static_cast(i), '|')); + } + array.emplace_back(std::string(std::numeric_limits::max() + 1, '|')); + //test with large strings (e.g. requiring uint64 as size type) are not done + test_json(json); +} +TEST_CASE("extended parser generated (binary)") +{ + std::cout << "extended parser generated (binary) "; + nlohmann::json json; + auto& array = json["binary"]; + array = nlohmann::json::array(); + for (std::uint64_t i = 0; i < 512; ++i) + { + array.emplace_back(nlohmann::json::binary(std::vector(static_cast(i), 255))); + } + //add large binary + std::vector data(std::numeric_limits::max() + 1, 255); + array.emplace_back(nlohmann::json::binary(std::move(data))); + test_json(json); +} diff --git a/tests/src/unit-sax-parser-store-source-location.cpp b/tests/src/unit-sax-parser-store-source-location.cpp new file mode 100644 index 0000000000..deff9ce9ee --- /dev/null +++ b/tests/src/unit-sax-parser-store-source-location.cpp @@ -0,0 +1,333 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.10.2 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2019 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include +#include + +#include "doctest_compatibility.h" + +#include + +//prototype to make -Wmissing-prototypes happy +std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p); + +//test json parser with detailed line / col information as metadata + +struct token_start_stop +{ + nlohmann::detail::position_t start{}; + nlohmann::detail::position_t stop{}; +}; + +std::ostream& operator<<(std::ostream& out, const nlohmann::detail::position_t& p) +{ + out << p.chars_read_total << '(' << p.lines_read << ':' << p.chars_read_current_line << ')'; + return out; +} + +using json_with_token_start_stop = + nlohmann::basic_json < + std::map, + std::vector, + std::string, + bool, + std::int64_t, + std::uint64_t, + double, + std::allocator, + nlohmann::adl_serializer, + std::vector, + token_start_stop >; + +//adapted from detail::json_sax_dom_parser +class sax_with_token_start_stop_metadata +{ + public: + using json = json_with_token_start_stop; + using number_integer_t = typename json::number_integer_t; + using number_unsigned_t = typename json::number_unsigned_t; + using number_float_t = typename json::number_float_t; + using string_t = typename json::string_t; + using binary_t = typename json::binary_t; + + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit sax_with_token_start_stop_metadata(json& r, const bool allow_exceptions_ = true) + : root(r) + , allow_exceptions(allow_exceptions_) + {} + + template + void next_token_start(const nlohmann::detail::lexer& lex) + { + start_stop.start = lex.get_position(); + } + + template + void next_token_end(const nlohmann::detail::lexer& lex) + { + start_stop.stop = lex.get_position(); + } + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(json::value_t::object)); + ref_stack.back()->start = start_stop.start; + + if (len != static_cast(-1) && len > ref_stack.back()->max_size()) + { + throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive object size: ", std::to_string(len)), ref_stack.back()); + } + + return true; + } + + bool key(string_t& val) + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_object()); + + // add null at given key and store the reference for later + object_element = &(*ref_stack.back())[val]; + return true; + } + + bool end_object() + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_object()); + + ref_stack.back()->stop = start_stop.stop; + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(json::value_t::array)); + ref_stack.back()->start = start_stop.start; + + if (len != static_cast(-1) && len > ref_stack.back()->max_size()) + { + throw nlohmann::detail::out_of_range::create(408, nlohmann::detail::concat("excessive array size: ", std::to_string(len)), ref_stack.back()); + } + + return true; + } + + bool end_array() + { + assert(!ref_stack.empty()); + assert(ref_stack.back()->is_array()); + + ref_stack.back()->stop = start_stop.stop; + ref_stack.pop_back(); + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + throw ex; + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + json* + handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = json(std::forward(v)); + root.start = start_stop.start; + root.stop = start_stop.stop; + return &root; + } + + assert(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + auto& array_element = ref_stack.back()->emplace_back(std::forward(v)); + array_element.start = start_stop.start; + array_element.stop = start_stop.stop; + return &array_element; + } + + assert(ref_stack.back()->is_object()); + assert(object_element); + *object_element = json(std::forward(v)); + object_element->start = start_stop.start; + object_element->stop = start_stop.stop; + return object_element; + } + + /// the parsed JSON value + json& root; + /// stack to model hierarchy of values + std::vector ref_stack{}; + /// helper to hold the reference for the next object element + json* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// start / stop information for the current token + token_start_stop start_stop{}; +}; + +TEST_CASE("parse-json-with-position-info") +{ + const std::string str = + /*line 0*/ R"({)" + "\n" + /*line 1*/ R"( "array" : [)" + "\n" + /*line 2*/ R"( 14294967296,)" + "\n" + /*line 3*/ R"( -1,)" + "\n" + /*line 4*/ R"( true,)" + "\n" + /*line 5*/ R"( 4.2,)" + "\n" + /*line 6*/ R"( null,)" + "\n" + /*line 7*/ R"( "str")" + "\n" + /*line 8*/ R"( ])" + "\n" + /*line 9*/ R"(})"; + json_with_token_start_stop j; + sax_with_token_start_stop_metadata sax{j}; + CHECK(nlohmann::json::sax_parse(str, &sax, nlohmann::json::input_format_t::json)); + CHECK(j.start.lines_read == 0); + CHECK(j.start.chars_read_current_line == 1); + + CHECK(j["array"].start.lines_read == 1); + CHECK(j["array"].start.chars_read_current_line == 13); + + CHECK(j["array"][0].start.lines_read == 2); + CHECK(j["array"][0].start.chars_read_current_line == 5); + CHECK(j["array"][0].stop.lines_read == 2); + CHECK(j["array"][0].stop.chars_read_current_line == 15); + + CHECK(j["array"][1].start.lines_read == 3); + CHECK(j["array"][1].start.chars_read_current_line == 5); + CHECK(j["array"][1].stop.lines_read == 3); + CHECK(j["array"][1].stop.chars_read_current_line == 6); + + CHECK(j["array"][2].start.lines_read == 4); + CHECK(j["array"][2].start.chars_read_current_line == 5); + CHECK(j["array"][2].stop.lines_read == 4); + CHECK(j["array"][2].stop.chars_read_current_line == 8); + + CHECK(j["array"][3].start.lines_read == 5); + CHECK(j["array"][3].start.chars_read_current_line == 5); + CHECK(j["array"][3].stop.lines_read == 5); + CHECK(j["array"][3].stop.chars_read_current_line == 7); + + CHECK(j["array"][4].start.lines_read == 6); //starts directly after last value.... + CHECK(j["array"][4].start.chars_read_current_line == 5); + CHECK(j["array"][4].stop.lines_read == 6); + CHECK(j["array"][4].stop.chars_read_current_line == 8); + + CHECK(j["array"][5].start.lines_read == 7); + CHECK(j["array"][5].start.chars_read_current_line == 5); + CHECK(j["array"][5].stop.lines_read == 7); + CHECK(j["array"][5].stop.chars_read_current_line == 9); + + CHECK(j["array"].stop.lines_read == 8); + CHECK(j["array"].stop.chars_read_current_line == 3); + + CHECK(j.stop.lines_read == 9); + CHECK(j.stop.chars_read_current_line == 1); +}