From 8c4e331acbfec50d0a1b5dc3feebce2fbaaf5f32 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Mon, 11 Oct 2021 16:06:06 -0400 Subject: [PATCH] Update to upstream simdjson v1.0.0, no further changes. --- simdjson/simdjson.cpp | 933 +++- simdjson/simdjson.h | 11086 ++++++++++++++++++++++++++++++---------- 2 files changed, 9171 insertions(+), 2848 deletions(-) diff --git a/simdjson/simdjson.cpp b/simdjson/simdjson.cpp index ffc002e..85cfb3f 100644 --- a/simdjson/simdjson.cpp +++ b/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2021-03-16 17:57:23 -0400. Do not edit! */ +/* auto-generated on 2021-09-07 14:34:40 -0400. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -9,6 +9,8 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS #include #include #include +#include + namespace simdjson { namespace internal { /*! @@ -866,9 +868,9 @@ inline char *format_buffer(char *buf, int len, int decimal_exponent, std::memset(buf + k, '0', static_cast(n) - static_cast(k)); // Make it look like a floating-point number (#362, #378) - buf[n + 0] = '.'; - buf[n + 1] = '0'; - return buf + (static_cast(n) + 2); + // buf[n + 0] = '.'; + // buf[n + 1] = '0'; + return buf + (static_cast(n)); } if (0 < n && n <= max_exp) { @@ -921,7 +923,8 @@ format. Returns an iterator pointing past-the-end of the decimal representation. */ char *to_chars(char *first, const char *last, double value) { static_cast(last); // maybe unused - fix warning - if (value <= -0) { + bool negative = std::signbit(value); + if (negative) { value = -value; *first++ = '-'; } @@ -930,8 +933,10 @@ char *to_chars(char *first, const char *last, double value) { { *first++ = '0'; // Make it look like a floating-point number (#362, #378) - *first++ = '.'; - *first++ = '0'; + if(negative) { + *first++ = '.'; + *first++ = '0'; + } return first; } // Compute v = buffer * 10^decimal_exponent. @@ -1088,6 +1093,86 @@ decimal parse_decimal(const char *&p) noexcept { return answer; } +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + namespace { // remove all final zeroes @@ -1427,6 +1512,12 @@ adjusted_mantissa parse_long_mantissa(const char *first) { return compute_float(d); } +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + double from_chars(const char *first) noexcept { bool negative = first[0] == '-'; if (negative) { @@ -1443,6 +1534,23 @@ double from_chars(const char *first) noexcept { return value; } + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + } // internal } // simdjson /* end file src/from_chars.cpp */ @@ -1478,7 +1586,10 @@ namespace internal { { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }, - { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." } + { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, + { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, + { OUT_OF_BOUNDS, "Attempted to access location outside of document."} }; // error_messages[] } // namespace internal @@ -2674,8 +2785,10 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation ) const noexcept { dst.reset( new (std::nothrow) dom_parser_implementation() ); if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; return SUCCESS; } @@ -2962,7 +3075,6 @@ using namespace simd; } this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } } // do not forget to call check_eof! @@ -3423,8 +3535,7 @@ class json_minifier { simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); - in.compress(mask, dst); - dst += 64 - count_ones(mask); + dst += in.compress(mask, dst); } simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { @@ -3505,8 +3616,8 @@ namespace { * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. @@ -3520,7 +3631,8 @@ namespace { * batch. */ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { @@ -3557,6 +3669,25 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati // Last document is incomplete; mark the document at i + 1 as the next one return i; } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } return 0; } @@ -3587,8 +3718,62 @@ class bit_indexer { // it helps tremendously. if (bits == 0) return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + int cnt = static_cast(count_ones(bits)); // Do the first 8 all together for (int i=0; i<8; i++) { this->tail[i] = idx + trailing_zeroes(bits); @@ -3617,6 +3802,7 @@ class bit_indexer { } this->tail += cnt; +#endif } }; @@ -3630,14 +3816,14 @@ class json_structural_indexer { * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. */ template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); template simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -3687,10 +3873,17 @@ simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) // workout. // template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - if (partial) { len = trim_partial_utf8(buf, len); } - + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } buf_block_reader reader(buf, len); json_structural_indexer indexer(parser.structural_indexes.get()); @@ -3698,12 +3891,11 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa while (reader.has_full_block()) { indexer.step(reader.full_block(), reader); } - - // Take care of the last block (will always be there unless file is empty) + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); } @@ -3734,14 +3926,13 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 len)) { return UNEXPECTED_ERROR; } - if (partial) { + if (partial == stage1_mode::streaming_partial) { // If we have an unclosed string, then the last structural // will be the quote and we want to make sure to omit it. if(have_unclosed_string) { @@ -3785,11 +3980,48 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp // a valid JSON file cannot have zero structural indexes - we should have found something if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } } + // We truncate the input to the end of the last complete document (or zero). auto new_structural_indexes = find_next_document_index(parser); if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } } checker.check_eof(); return checker.errors(); @@ -4059,12 +4291,12 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum { auto value = advance(); - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 if (!STREAMING) { switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } @@ -4662,7 +4894,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_ return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); } -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { this->buf = _buf; this->len = _len; return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); @@ -4681,7 +4913,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); + auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } return stage2(_doc); } @@ -4710,8 +4942,10 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation ) const noexcept { dst.reset( new (std::nothrow) dom_parser_implementation() ); if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; return SUCCESS; } @@ -4746,8 +4980,8 @@ namespace { * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. @@ -4761,7 +4995,8 @@ namespace { * batch. */ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { @@ -4798,6 +5033,25 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati // Last document is incomplete; mark the document at i + 1 as the next one return i; } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } return 0; } @@ -4814,7 +5068,7 @@ namespace stage1 { class structural_scanner { public: -simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, bool _partial) +simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) : buf{_parser.buf}, next_structural_index{_parser.structural_indexes.get()}, parser{_parser}, @@ -4844,7 +5098,7 @@ simdjson_really_inline void validate_utf8_character() { if ((buf[idx] & 0b00100000) == 0) { // missing continuation if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { - if (idx+1 > len && partial) { idx = len; return; } + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } error = UTF8_ERROR; idx++; return; @@ -4859,7 +5113,7 @@ simdjson_really_inline void validate_utf8_character() { if ((buf[idx] & 0b00010000) == 0) { // missing continuation if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { - if (idx+2 > len && partial) { idx = len; return; } + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } error = UTF8_ERROR; idx++; return; @@ -4875,7 +5129,7 @@ simdjson_really_inline void validate_utf8_character() { // 4-byte // missing continuation if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { - if (idx+2 > len && partial) { idx = len; return; } + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } error = UTF8_ERROR; idx++; return; @@ -4948,24 +5202,56 @@ simdjson_really_inline error_code scan() { break; } } - *next_structural_index = len; // We pad beyond. // https://github.com/simdjson/simdjson/issues/906 + // See json_structural_indexer.h for an explanation. + *next_structural_index = len; // assumed later in partial == stage1_mode::streaming_final next_structural_index[1] = len; next_structural_index[2] = 0; parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get()); if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return EMPTY; } parser.next_structural_index = 0; - if (partial) { + if (partial == stage1_mode::streaming_partial) { if(unclosed_string) { parser.n_structural_indexes--; if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return CAPACITY; } } + // We truncate the input to the end of the last complete document (or zero). auto new_structural_indexes = find_next_document_index(parser); if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } } else if(unclosed_string) { error = UNCLOSED_STRING; } return error; } @@ -4977,13 +5263,13 @@ simdjson_really_inline error_code scan() { uint32_t len; uint32_t idx{0}; error_code error{SUCCESS}; - bool partial; + stage1_mode partial; }; // structural_scanner } // namespace stage1 } // unnamed namespace -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool partial) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { this->buf = _buf; this->len = _len; stage1::structural_scanner scanner(*this, partial); @@ -5333,12 +5619,12 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum { auto value = advance(); - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 if (!STREAMING) { switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } @@ -5926,7 +6212,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); + auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } return stage2(_doc); } @@ -5956,8 +6242,10 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation ) const noexcept { dst.reset( new (std::nothrow) dom_parser_implementation() ); if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; return SUCCESS; } @@ -6251,7 +6539,6 @@ using namespace simd; } this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } } // do not forget to call check_eof! @@ -6712,8 +6999,7 @@ class json_minifier { simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); - in.compress(mask, dst); - dst += 64 - count_ones(mask); + dst += in.compress(mask, dst); } simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { @@ -6794,8 +7080,8 @@ namespace { * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. @@ -6809,7 +7095,8 @@ namespace { * batch. */ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { @@ -6846,6 +7133,25 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati // Last document is incomplete; mark the document at i + 1 as the next one return i; } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } return 0; } @@ -6876,8 +7182,62 @@ class bit_indexer { // it helps tremendously. if (bits == 0) return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + int cnt = static_cast(count_ones(bits)); // Do the first 8 all together for (int i=0; i<8; i++) { this->tail[i] = idx + trailing_zeroes(bits); @@ -6906,6 +7266,7 @@ class bit_indexer { } this->tail += cnt; +#endif } }; @@ -6919,14 +7280,14 @@ class json_structural_indexer { * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. */ template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); template simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -6976,10 +7337,17 @@ simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) // workout. // template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - if (partial) { len = trim_partial_utf8(buf, len); } - + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } buf_block_reader reader(buf, len); json_structural_indexer indexer(parser.structural_indexes.get()); @@ -6987,12 +7355,11 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa while (reader.has_full_block()) { indexer.step(reader.full_block(), reader); } - - // Take care of the last block (will always be there unless file is empty) + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); } @@ -7023,14 +7390,13 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 len)) { return UNEXPECTED_ERROR; } - if (partial) { + if (partial == stage1_mode::streaming_partial) { // If we have an unclosed string, then the last structural // will be the quote and we want to make sure to omit it. if(have_unclosed_string) { @@ -7074,11 +7444,48 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp // a valid JSON file cannot have zero structural indexes - we should have found something if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } } + // We truncate the input to the end of the last complete document (or zero). auto new_structural_indexes = find_next_document_index(parser); if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } } checker.check_eof(); return checker.errors(); @@ -7347,12 +7754,12 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum { auto value = advance(); - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 if (!STREAMING) { switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } @@ -7948,7 +8355,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_ return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); } -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { this->buf = _buf; this->len = _len; return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); @@ -7967,7 +8374,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); + auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } return stage2(_doc); } @@ -7997,8 +8404,10 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation ) const noexcept { dst.reset( new (std::nothrow) dom_parser_implementation() ); if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; return SUCCESS; } @@ -8256,7 +8665,6 @@ using namespace simd; } this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } } // do not forget to call check_eof! @@ -8717,8 +9125,7 @@ class json_minifier { simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); - in.compress(mask, dst); - dst += 64 - count_ones(mask); + dst += in.compress(mask, dst); } simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { @@ -8799,8 +9206,8 @@ namespace { * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. @@ -8814,7 +9221,8 @@ namespace { * batch. */ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { @@ -8851,6 +9259,25 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati // Last document is incomplete; mark the document at i + 1 as the next one return i; } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } return 0; } @@ -8881,8 +9308,62 @@ class bit_indexer { // it helps tremendously. if (bits == 0) return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + int cnt = static_cast(count_ones(bits)); // Do the first 8 all together for (int i=0; i<8; i++) { this->tail[i] = idx + trailing_zeroes(bits); @@ -8911,6 +9392,7 @@ class bit_indexer { } this->tail += cnt; +#endif } }; @@ -8924,14 +9406,14 @@ class json_structural_indexer { * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. */ template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); template simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -8981,10 +9463,17 @@ simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) // workout. // template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - if (partial) { len = trim_partial_utf8(buf, len); } - + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } buf_block_reader reader(buf, len); json_structural_indexer indexer(parser.structural_indexes.get()); @@ -8992,12 +9481,11 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa while (reader.has_full_block()) { indexer.step(reader.full_block(), reader); } - - // Take care of the last block (will always be there unless file is empty) + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); } @@ -9028,14 +9516,13 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 len)) { return UNEXPECTED_ERROR; } - if (partial) { + if (partial == stage1_mode::streaming_partial) { // If we have an unclosed string, then the last structural // will be the quote and we want to make sure to omit it. if(have_unclosed_string) { @@ -9079,11 +9570,48 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp // a valid JSON file cannot have zero structural indexes - we should have found something if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } } + // We truncate the input to the end of the last complete document (or zero). auto new_structural_indexes = find_next_document_index(parser); if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } } checker.check_eof(); return checker.errors(); @@ -9353,12 +9881,12 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum { auto value = advance(); - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 if (!STREAMING) { switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } @@ -9956,7 +10484,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_ return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); } -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { this->buf = _buf; this->len = _len; return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); @@ -9975,7 +10503,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); + auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } return stage2(_doc); } @@ -10005,8 +10533,10 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation ) const noexcept { dst.reset( new (std::nothrow) dom_parser_implementation() ); if (!dst) { return MEMALLOC; } - dst->set_capacity(capacity); - dst->set_max_depth(max_depth); + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; return SUCCESS; } @@ -10297,7 +10827,6 @@ using namespace simd; } this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } } // do not forget to call check_eof! @@ -10758,8 +11287,7 @@ class json_minifier { simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); - in.compress(mask, dst); - dst += 64 - count_ones(mask); + dst += in.compress(mask, dst); } simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { @@ -10840,8 +11368,8 @@ namespace { * * Simply put, we iterate over the structural characters, starting from * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ';' ',' - * and when the second element is NOT one of these characters: '}' '}' ';' ','. + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. * * This simple comparison works most of the time, but it does not cover cases * where the batch's structural indexes contain a perfect amount of documents. @@ -10855,7 +11383,8 @@ namespace { * batch. */ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // TODO don't count separately, just figure out depth + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; auto obj_cnt = 0; for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { @@ -10892,6 +11421,25 @@ simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementati // Last document is incomplete; mark the document at i + 1 as the next one return i; } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } return 0; } @@ -10922,8 +11470,62 @@ class bit_indexer { // it helps tremendously. if (bits == 0) return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + int cnt = static_cast(count_ones(bits)); // Do the first 8 all together for (int i=0; i<8; i++) { this->tail[i] = idx + trailing_zeroes(bits); @@ -10952,6 +11554,7 @@ class bit_indexer { } this->tail += cnt; +#endif } }; @@ -10965,14 +11568,14 @@ class json_structural_indexer { * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. */ template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept; + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); template simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, bool partial); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -11022,10 +11625,17 @@ simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) // workout. // template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, bool partial) noexcept { +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - if (partial) { len = trim_partial_utf8(buf, len); } - + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } buf_block_reader reader(buf, len); json_structural_indexer indexer(parser.structural_indexes.get()); @@ -11033,12 +11643,11 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa while (reader.has_full_block()) { indexer.step(reader.full_block(), reader); } - - // Take care of the last block (will always be there unless file is empty) + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return EMPTY; } + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); } @@ -11069,14 +11678,13 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 len)) { return UNEXPECTED_ERROR; } - if (partial) { + if (partial == stage1_mode::streaming_partial) { // If we have an unclosed string, then the last structural // will be the quote and we want to make sure to omit it. if(have_unclosed_string) { @@ -11120,11 +11732,48 @@ simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_imp // a valid JSON file cannot have zero structural indexes - we should have found something if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } } + // We truncate the input to the end of the last complete document (or zero). auto new_structural_indexes = find_next_document_index(parser); if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - return CAPACITY; // If the buffer is partial but the document is incomplete, it's too big to parse. + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } } checker.check_eof(); return checker.errors(); @@ -11393,12 +12042,12 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum { auto value = advance(); - // Make sure the outer hash or array is closed before continuing; otherwise, there are ways we + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 if (!STREAMING) { switch (*value) { - case '{': if (last_structural() != '}') { return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { return TAPE_ERROR; }; break; + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } @@ -11995,7 +12644,7 @@ simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_ return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); } -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, bool streaming) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { this->buf = _buf; this->len = _len; return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); @@ -12014,7 +12663,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, false); + auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } return stage2(_doc); } diff --git a/simdjson/simdjson.h b/simdjson/simdjson.h index fb34189..df1f97b 100644 --- a/simdjson/simdjson.h +++ b/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2021-03-16 17:57:23 -0400. Do not edit! */ +/* auto-generated on 2021-09-07 14:34:40 -0400. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -6,7 +6,7 @@ /** * @mainpage * - * Check the [README.md](https://github.com/lemire/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). + * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). * * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. @@ -283,6 +283,8 @@ char *to_chars(char *first, const char *last, double value); * Defined in src/from_chars */ double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; + } #ifndef SIMDJSON_EXCEPTIONS @@ -302,7 +304,7 @@ constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; * the input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this - * See https://github.com/lemire/simdjson/issues/174 + * See https://github.com/simdjson/simdjson/issues/174 */ constexpr size_t SIMDJSON_PADDING = 32; @@ -425,7 +427,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; * the regular visual studio or clang under visual * studio, you still need to handle these issues. * - * Non-Windows sytems do not have this complexity. + * Non-Windows systems do not have this complexity. */ #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. @@ -479,7 +481,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; // now it is safe to trigger the include #include // though the file is there, it does not follow that we got the implementation #if defined(_LIBCPP_STRING_VIEW) -// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceeded C++17, +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, // included string_view. // This means that we have string_view *even though* we may not have C++17. #define SIMDJSON_HAS_STRING_VIEW @@ -498,7 +500,7 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; #ifndef SIMDJSON_HAS_STRING_VIEW SIMDJSON_PUSH_DISABLE_ALL_WARNINGS /* begin file include/simdjson/nonstd/string_view.hpp */ -// Copyright 2017-2019 by Martin Moene +// Copyright 2017-2020 by Martin Moene // // string-view lite, a C++17-like string_view for C++98 and later. // For more information see https://github.com/martinmoene/string-view-lite @@ -512,7 +514,7 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS #define NONSTD_SV_LITE_H_INCLUDED #define string_view_lite_MAJOR 1 -#define string_view_lite_MINOR 4 +#define string_view_lite_MINOR 6 #define string_view_lite_PATCH 0 #define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) @@ -526,12 +528,22 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS #define nssv_STRING_VIEW_NONSTD 1 #define nssv_STRING_VIEW_STD 2 -#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) -# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") #endif -#if defined( nssv_CONFIG_SELECT_STD_STRING_VIEW ) || defined( nssv_CONFIG_SELECT_NONSTD_STRING_VIEW ) -# error nssv_CONFIG_SELECT_STD_STRING_VIEW and nssv_CONFIG_SELECT_NONSTD_STRING_VIEW are deprecated and removed, please use nssv_CONFIG_SELECT_STRING_VIEW=nssv_STRING_VIEW_... +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) #endif #ifndef nssv_CONFIG_STD_SV_OPERATOR @@ -555,10 +567,17 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS # define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 #endif +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + // Control presence of exception handling (try and auto discover): #ifndef nssv_CONFIG_NO_EXCEPTIONS -# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) +# if _MSC_VER +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) # define nssv_CONFIG_NO_EXCEPTIONS 0 # else # define nssv_CONFIG_NO_EXCEPTIONS 1 @@ -721,16 +740,21 @@ using std::operator<<; #define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) -#if defined(__clang__) -# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) #else -# define nssv_COMPILER_CLANG_VERSION 0 +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 #endif #if defined(__GNUC__) && !defined(__clang__) # define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) #else -# define nssv_COMPILER_GNUC_VERSION 0 +# define nssv_COMPILER_GNUC_VERSION 0 #endif // half-open range [lo..hi): @@ -792,6 +816,45 @@ using std::operator<<; #define nssv_HAVE_STD_HASH nssv_CPP11_120 +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + // C++ feature usage: #if nssv_HAVE_CONSTEXPR_11 @@ -850,9 +913,12 @@ using std::operator<<; #include #include #include -#include #include // std::char_traits<> +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + #if ! nssv_CONFIG_NO_EXCEPTIONS # include #endif @@ -905,39 +971,74 @@ nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) namespace nonstd { namespace sv_lite { -#if nssv_CPP11_OR_GREATER - namespace detail { -#if nssv_CPP14_OR_GREATER +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: template< typename CharT > -inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) { - CharT * v = s; - std::size_t r = result; - while ( *v != '\0' ) { - ++v; - ++r; + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; } - return r; + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); } -#else // nssv_CPP14_OR_GREATER +#endif + +#if defined(__OPTIMIZE__) +// gcc, clang provide __OPTIMIZE__ // Expect tail call optimization to make length() non-recursive: template< typename CharT > -inline constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) { return *s == '\0' ? result : length( s + 1, result + 1 ); } -#endif // nssv_CPP14_OR_GREATER +#else // OPTIMIZE -} // namespace detail +// non-recursive: -#endif // nssv_CPP11_OR_GREATER +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +} // namespace detail template < @@ -1089,9 +1190,9 @@ class basic_string_view nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept { - using std::swap; - swap( data_, other.data_ ); - swap( size_, other.size_ ); + const basic_string_view tmp(other); + other = *this; + *this = tmp; } // 24.4.2.6 String operations: @@ -1130,7 +1231,11 @@ class basic_string_view nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) { +#if nssv_CPP17_OR_GREATER if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif { return result; } @@ -1374,7 +1479,7 @@ class basic_string_view { const basic_string_view v; - nssv_constexpr explicit not_in_view( basic_string_view v ) : v( v ) {} + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} nssv_constexpr bool operator()( CharT c ) const { @@ -1464,37 +1569,37 @@ template< class CharT, class Traits > nssv_constexpr bool operator== ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0 ; } +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits > nssv_constexpr bool operator!= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0 ; } +{ return !( lhs == rhs ); } template< class CharT, class Traits > nssv_constexpr bool operator< ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } +{ return lhs.compare( rhs ) < 0; } template< class CharT, class Traits > nssv_constexpr bool operator<= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } +{ return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits > nssv_constexpr bool operator> ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } +{ return lhs.compare( rhs ) > 0; } template< class CharT, class Traits > nssv_constexpr bool operator>= ( basic_string_view lhs, basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } +{ return lhs.compare( rhs ) >= 0; } // Let S be basic_string_view, and sv be an instance of S. // Implementations shall provide sufficient additional overloads marked @@ -1503,21 +1608,21 @@ nssv_constexpr bool operator>= ( #if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) -// accomodate for older compilers: +// accommodate for older compilers: // == template< class CharT, class Traits> nssv_constexpr bool operator==( basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0; } + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) == 0; } +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } template< class CharT, class Traits> nssv_constexpr bool operator==( @@ -1536,38 +1641,38 @@ nssv_constexpr bool operator==( template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, - char const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0; } + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) != 0; } +{ return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( basic_string_view lhs, std::basic_string rhs ) nssv_noexcept -{ return lhs.size() != rhs.size() && lhs.compare( rhs ) != 0; } +{ return !( lhs == rhs ); } template< class CharT, class Traits> nssv_constexpr bool operator!=( std::basic_string rhs, basic_string_view lhs ) nssv_noexcept -{ return lhs.size() != rhs.size() || rhs.compare( lhs ) != 0; } +{ return !( lhs == rhs ); } // < template< class CharT, class Traits> nssv_constexpr bool operator<( basic_string_view lhs, - char const * rhs ) nssv_noexcept + CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) < 0; } template< class CharT, class Traits> nssv_constexpr bool operator<( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) > 0; } @@ -1588,12 +1693,12 @@ nssv_constexpr bool operator<( template< class CharT, class Traits> nssv_constexpr bool operator<=( basic_string_view lhs, - char const * rhs ) nssv_noexcept + CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits> nssv_constexpr bool operator<=( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) >= 0; } @@ -1614,12 +1719,12 @@ nssv_constexpr bool operator<=( template< class CharT, class Traits> nssv_constexpr bool operator>( basic_string_view lhs, - char const * rhs ) nssv_noexcept + CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) > 0; } template< class CharT, class Traits> nssv_constexpr bool operator>( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) < 0; } @@ -1640,12 +1745,12 @@ nssv_constexpr bool operator>( template< class CharT, class Traits> nssv_constexpr bool operator>=( basic_string_view lhs, - char const * rhs ) nssv_noexcept + CharT const * rhs ) nssv_noexcept { return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits> nssv_constexpr bool operator>=( - char const * lhs, + CharT const * lhs, basic_string_view rhs ) nssv_noexcept { return rhs.compare( lhs ) <= 0; } @@ -1665,7 +1770,7 @@ nssv_constexpr bool operator>=( #define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type -#if nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 140, 150 ) +#if defined(_MSC_VER) // issue 40 # define nssv_MSVC_ORDER(x) , int=x #else # define nssv_MSVC_ORDER(x) /*, int=x*/ @@ -1677,7 +1782,7 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator==( basic_string_view lhs, nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) == 0; } +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator==( @@ -1691,13 +1796,13 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator!= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.size() != rhs.size() || lhs.compare( rhs ) != 0 ; } +{ return !( lhs == rhs ); } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator!= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) != 0 ; } +{ return !( lhs == rhs ); } // < @@ -1705,13 +1810,13 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator< ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } +{ return lhs.compare( rhs ) < 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator< ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0 ; } +{ return lhs.compare( rhs ) < 0; } // <= @@ -1719,13 +1824,13 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator<= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } +{ return lhs.compare( rhs ) <= 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator<= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0 ; } +{ return lhs.compare( rhs ) <= 0; } // > @@ -1733,13 +1838,13 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator> ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } +{ return lhs.compare( rhs ) > 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator> ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0 ; } +{ return lhs.compare( rhs ) > 0; } // >= @@ -1747,13 +1852,13 @@ template< class CharT, class Traits nssv_MSVC_ORDER(1) > nssv_constexpr bool operator>= ( basic_string_view < CharT, Traits > lhs, nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } +{ return lhs.compare( rhs ) >= 0; } template< class CharT, class Traits nssv_MSVC_ORDER(2) > nssv_constexpr bool operator>= ( nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0 ; } +{ return lhs.compare( rhs ) >= 0; } #undef nssv_MSVC_ORDER #undef nssv_BASIC_STRING_VIEW_I @@ -1762,6 +1867,8 @@ nssv_constexpr bool operator>= ( // 24.4.4 Inserters and extractors: +#if ! nssv_CONFIG_NO_STREAM_INSERTION + namespace detail { template< class Stream > @@ -1811,6 +1918,8 @@ operator<<( return detail::write_to_stream( os, sv ); } +#endif // nssv_CONFIG_NO_STREAM_INSERTION + // Several typedefs for common character types are provided: typedef basic_string_view string_view; @@ -1959,7 +2068,9 @@ using sv_lite::operator<=; using sv_lite::operator>; using sv_lite::operator>=; +#if ! nssv_CONFIG_NO_STREAM_INSERTION using sv_lite::operator<<; +#endif #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS using sv_lite::to_string; @@ -2045,6 +2156,25 @@ namespace std { #endif #endif +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/common_defs.h */ @@ -2059,18 +2189,18 @@ SIMDJSON_DISABLE_UNDESIRED_WARNINGS #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 0.9.0 +#define SIMDJSON_VERSION 1.0.0 namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ - SIMDJSON_VERSION_MAJOR = 0, + SIMDJSON_VERSION_MAJOR = 1, /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 9, + SIMDJSON_VERSION_MINOR = 0, /** * The revision (major.minor.REVISION) of simdjson being used. */ @@ -2092,33 +2222,36 @@ namespace simdjson { * All possible errors returned by simdjson. */ enum error_code { - SUCCESS = 0, ///< No error - CAPACITY, ///< This parser can't support a document that big - MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error - DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation - STRING_ERROR, ///< Problem while parsing a string - T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' - F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' - N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' - NUMBER_ERROR, ///< Problem while parsing a number - UTF8_ERROR, ///< the input is not valid UTF-8 - UNINITIALIZED, ///< unknown error, or uninitialized document - EMPTY, ///< no structural element found - UNESCAPED_CHARS, ///< found unescaped characters in a string. - UNCLOSED_STRING, ///< missing quote at the end - UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture - INCORRECT_TYPE, ///< JSON element has a different type than user expected - NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits - INDEX_OUT_OF_BOUNDS, ///< JSON array index too large - NO_SUCH_FIELD, ///< JSON field not found in object - IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference - INVALID_URI_FRAGMENT, ///< Invalid URI fragment - UNEXPECTED_ERROR, ///< indicative of a bug in simdjson - PARSER_IN_USE, ///< parser is already in use. - OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order - INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. NUM_ERROR_CODES }; @@ -2262,13 +2395,13 @@ struct simdjson_result_base : protected std::pair { /** * Get the result value. This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline T&& value_unsafe() && noexcept; @@ -2353,13 +2486,13 @@ struct simdjson_result : public internal::simdjson_result_base { /** * Get the result value. This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline T&& value_unsafe() && noexcept; @@ -2368,8 +2501,7 @@ struct simdjson_result : public internal::simdjson_result_base { #if SIMDJSON_EXCEPTIONS template -inline std::ostream& operator<<(std::ostream& out, simdjson_result value) noexcept { return out << value.value(); } - +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } #endif // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API @@ -2503,7 +2635,7 @@ struct padded_string final { * * @param path the path to the file. **/ - inline static simdjson_result load(const std::string &path) noexcept; + inline static simdjson_result load(std::string_view path) noexcept; private: padded_string &operator=(const padded_string &o) = delete; @@ -2679,8 +2811,30 @@ namespace dom { class document; } // namespace dom +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + namespace internal { + /** * An implementation of simdjson's DOM parser for a particular CPU architecture. * @@ -2719,7 +2873,7 @@ class dom_parser_implementation { * @param streaming Whether this is being called by parser::parse_many. * @return The error code, or SUCCESS if there was no error. */ - simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, bool streaming) noexcept = 0; + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; /** * @private For internal implementation use @@ -3596,11 +3750,11 @@ inline padded_string::operator padded_string_view() const noexcept { return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); } -inline simdjson_result padded_string::load(const std::string &filename) noexcept { +inline simdjson_result padded_string::load(std::string_view filename) noexcept { // Open the file SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(filename.c_str(), "rb"); + std::FILE *fp = std::fopen(filename.data(), "rb"); SIMDJSON_POP_DISABLE_WARNINGS if (fp == nullptr) { @@ -4392,7 +4546,7 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. @@ -4486,18 +4640,18 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_result parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -4593,7 +4747,7 @@ class parser { /** * @private return an error code corresponding to the last parsing attempt, see - * simdjson.h will return UNITIALIZED if no parsing was attempted + * simdjson.h will return UNINITIALIZED if no parsing was attempted */ [[deprecated("Use the result of parser.parse() instead")]] inline int get_error_code() const noexcept; @@ -4750,7 +4904,29 @@ class document_stream { simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; simdjson_really_inline ~document_stream() noexcept; - + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.parse_many(json,window); + * for(auto doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; /** * An iterator through a forward-only stream of documents. */ @@ -4764,7 +4940,7 @@ class document_stream { using iterator_category = std::input_iterator_tag; /** - * Default contructor. + * Default constructor. */ simdjson_really_inline iterator() noexcept; /** @@ -4908,7 +5084,6 @@ class document_stream { error_code error; size_t batch_start{0}; size_t doc_index{}; - #ifdef SIMDJSON_THREADS_ENABLED /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ bool use_thread; @@ -5320,7 +5495,7 @@ class element { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: @@ -5335,7 +5510,7 @@ class element { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: @@ -5407,7 +5582,7 @@ class element { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * @return The value associated with this field, or: @@ -5630,7 +5805,7 @@ class object { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. @@ -5647,7 +5822,7 @@ class object { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. @@ -5689,7 +5864,7 @@ class object { * The key will be matched against **unescaped** JSON: * * dom::parser parser; - * parser.parse(R"({ "a\n": 1 })"_padded)["a\n"].get_uint64().first == 1 + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD * * This function has linear-time complexity: the keys are checked one by one. @@ -5813,7 +5988,7 @@ class string_builder { inline void append(simdjson::dom::element value); /** Append an array to the builder (to be printed) **/ inline void append(simdjson::dom::array value); - /** Append an objet to the builder (to be printed) **/ + /** Append an object to the builder (to be printed) **/ inline void append(simdjson::dom::object value); /** Reset the builder (so that it would print the empty string) **/ simdjson_really_inline void clear(); @@ -5868,7 +6043,7 @@ class mini_formatter { /** Clears out the content. **/ simdjson_really_inline void clear(); /** - * Get access to the buffer, it is own by the instance, but + * Get access to the buffer, it is owned by the instance, but * the user can make a copy. **/ simdjson_really_inline std::string_view str() const; @@ -5925,7 +6100,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include -#include +#include #include #include #include @@ -6170,7 +6345,7 @@ dom::parser build_parsed_json(const char *buf) noexcept = delete; #define SIMDJSON_INTERNAL_JSONFORMATUTILS_H #include -#include +#include #include namespace simdjson { @@ -6427,7 +6602,7 @@ class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)") // throughout return true if we can do the navigation, false // otherwise - // Withing a given scope (series of nodes at the same depth within either an + // Within a given scope (series of nodes at the same depth within either an // array or an object), we move forward. // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { // and [. At the object ({) or at the array ([), you can issue a "down" to @@ -7275,7 +7450,7 @@ inline void document_stream::start() noexcept { // Always run the first stage 1 parse immediately batch_start = 0; error = run_stage1(*parser, batch_start); - if(error == EMPTY) { + while(error == EMPTY) { // In exceptional cases, we may start with an empty block batch_start = next_batch_start(); if (batch_start >= len) { return; } @@ -7292,7 +7467,6 @@ inline void document_stream::start() noexcept { if (error) { return; } } #endif // SIMDJSON_THREADS_ENABLED - next(); } @@ -7301,13 +7475,20 @@ simdjson_really_inline size_t document_stream::iterator::current_index() const n } simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { - size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; - return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + const char* start = reinterpret_cast(stream->buf) + current_index(); + bool object_or_array = ((*start == '[') || (*start == '{')); + if(object_or_array) { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; + return std::string_view(start, next_doc_index - current_index() + 1); + } else { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; + return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + } } inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. + // We always exit at once, once in an error condition. if (error) { return; } // Load the next document from the batch @@ -7333,18 +7514,25 @@ inline void document_stream::next() noexcept { error = parser->implementation->stage2_next(parser->doc); } } +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} inline size_t document_stream::next_batch_start() const noexcept { return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { - // If this is the final batch, pass partial = false size_t remaining = len - _batch_start; if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, false); + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, true); + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } } @@ -7450,7 +7638,7 @@ inline error_code document::allocate(size_t capacity) noexcept { // need a capacity of at least capacity + 1, but it is also possible to do // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" //where capacity + 1 tape elements are - // generated, see issue https://github.com/lemire/simdjson/issues/345 + // generated, see issue https://github.com/simdjson/simdjson/issues/345 size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); // a document with only zero-length strings... could have capacity/3 string // and we would need capacity/3 * 5 bytes on the string buffer @@ -8408,13 +8596,18 @@ inline simdjson_result parser::parse_into_document(document& provided_d // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! error_code _error = ensure_capacity(provided_doc, len); if (_error) { return _error; } - std::unique_ptr tmp_buf; if (realloc_if_needed) { - tmp_buf.reset(reinterpret_cast( internal::allocate_padded_buffer(len) )); - if (tmp_buf.get() == nullptr) { return MEMALLOC; } - std::memcpy(static_cast(tmp_buf.get()), buf, len); + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); } - _error = implementation->parse(realloc_if_needed ? tmp_buf.get() : buf, len, provided_doc); + _error = implementation->parse(realloc_if_needed ? reinterpret_cast(loaded_bytes.get()): buf, len, provided_doc); if (_error) { return _error; } @@ -9332,7 +9525,7 @@ class dom_parser_implementation final : public internal::dom_parser_implementati dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; @@ -9438,6 +9631,40 @@ simdjson_really_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +NO_SANITIZE_UNDEFINED +simdjson_really_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; @@ -9553,6 +9780,19 @@ simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint return x; } +simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} // We have to do the same work for make_int8x16_t simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, @@ -9785,6 +10025,27 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x vst1q_u8(reinterpret_cast(output), answer); } + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_really_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + template simdjson_really_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, @@ -9860,7 +10121,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x // Explicit conversion to/from unsigned // // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. - // In theory, we could check this occurence with std::same_as and std::enabled_if but it is C++14 + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 // and relatively ugly and hard to read. #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} @@ -9935,11 +10196,15 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } - simdjson_really_inline void compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; } simdjson_really_inline uint64_t to_bitmask() const { @@ -10386,13 +10651,25 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t } // namespace arm64 } // namespace simdjson -#define SWAR_NUMBER_PARSING +#define SIMDJSON_SWAR_NUMBER_PARSING 1 /* begin file include/simdjson/generic/numberparsing.h */ #include namespace simdjson { namespace arm64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + namespace { /// @private namespace numberparsing { @@ -10551,8 +10828,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are @@ -10667,7 +10944,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. + // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); @@ -10695,6 +10972,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's @@ -10743,14 +11034,16 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; -#ifdef SWAR_NUMBER_PARSING +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } -#endif +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } @@ -10817,9 +11110,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } + while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } @@ -10830,7 +11121,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { @@ -10889,7 +11180,12 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -11099,14 +11395,59 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // Check for minus sign + // Parse the integer part. // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; // // Parse the integer part. // @@ -11118,14 +11459,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); - // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) + // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... @@ -11133,13 +11472,9 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INCORRECT_TYPE; } - return ~i+1; + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. @@ -11152,45 +11487,173 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } - return negative ? (~i+1) : i; + return i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + const uint8_t *p = src + negative; // // Parse the integer part. // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - // Overflow check. More than 19 digits (minus the decimal) may be overflow. + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows @@ -11235,80 +11698,273 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING -} // namespace numberparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} -#endif // SIMDJSON_ARM64_NUMBERPARSING_H -/* end file include/simdjson/arm64/numberparsing.h */ -/* begin file include/simdjson/arm64/end.h */ -/* end file include/simdjson/arm64/end.h */ +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} -#endif // SIMDJSON_IMPLEMENTATION_ARM64 +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + return ondemand::number_type::floating_point_number; +} -#endif // SIMDJSON_ARM64_H -/* end file include/simdjson/arm64.h */ -/* begin file include/simdjson/fallback.h */ -#ifndef SIMDJSON_FALLBACK_H -#define SIMDJSON_FALLBACK_H +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#if SIMDJSON_IMPLEMENTATION_FALLBACK + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -namespace simdjson { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { -} // namespace fallback -} // namespace simdjson + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } -/* begin file include/simdjson/fallback/implementation.h */ -#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H -#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -namespace simdjson { -namespace fallback { + exponent += exp_neg ? 0-exp : exp; + } -namespace { -using namespace simdjson; -using namespace simdjson::dom; -} + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation( - "fallback", - "Generic fallback implementation", - 0 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -} // namespace fallback -} // namespace simdjson + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} -#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H -/* end file include/simdjson/fallback/implementation.h */ +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; -/* begin file include/simdjson/fallback/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "fallback" -// #define SIMDJSON_IMPLEMENTATION fallback -/* end file include/simdjson/fallback/begin.h */ + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -// Declarations + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_ARM64_NUMBERPARSING_H +/* end file include/simdjson/arm64/numberparsing.h */ +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 + +#endif // SIMDJSON_ARM64_H +/* end file include/simdjson/arm64.h */ +/* begin file include/simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + + +#if SIMDJSON_IMPLEMENTATION_FALLBACK + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + + +namespace simdjson { +namespace fallback { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file include/simdjson/fallback/implementation.h */ + +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +// Declarations /* begin file include/simdjson/generic/dom_parser_implementation.h */ namespace simdjson { @@ -11342,7 +11998,7 @@ class dom_parser_implementation final : public internal::dom_parser_implementati dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; @@ -11826,12 +12482,25 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t } // namespace fallback } // namespace simdjson -#define SWAR_NUMBER_PARSING +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + /* begin file include/simdjson/generic/numberparsing.h */ #include namespace simdjson { namespace fallback { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + namespace { /// @private namespace numberparsing { @@ -11990,8 +12659,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are @@ -12106,7 +12775,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. + // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); @@ -12134,6 +12803,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's @@ -12182,14 +12865,16 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; -#ifdef SWAR_NUMBER_PARSING +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } -#endif +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } @@ -12256,9 +12941,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } + while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } @@ -12269,7 +12952,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { @@ -12328,7 +13011,12 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -12538,14 +13226,59 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // Check for minus sign + // Parse the integer part. // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; // // Parse the integer part. // @@ -12557,14 +13290,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); - // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) + // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... @@ -12572,13 +13303,9 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INCORRECT_TYPE; } - return ~i+1; + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. @@ -12591,39 +13318,167 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } - return negative ? (~i+1) : i; + return i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + const uint8_t *p = src + negative; // // Parse the integer part. // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // - // Parse the decimal part. + // Check for minus sign // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } @@ -12674,6 +13529,199 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} } //namespace {} #endif // SIMDJSON_SKIPNUMBERPARSING @@ -12846,7 +13894,7 @@ class dom_parser_implementation final : public internal::dom_parser_implementati dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; @@ -13288,11 +14336,12 @@ namespace simd { simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - simdjson_really_inline void compress(uint64_t mask, T * output) const { + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { uint32_t mask1 = uint32_t(mask); uint32_t mask2 = uint32_t(mask >> 32); this->chunks[0].compress(mask1, output); this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); } simdjson_really_inline void store(T ptr[64]) const { @@ -13749,13 +14798,25 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t } // namespace haswell } // namespace simdjson -#define SWAR_NUMBER_PARSING +#define SIMDJSON_SWAR_NUMBER_PARSING 1 /* begin file include/simdjson/generic/numberparsing.h */ #include namespace simdjson { namespace haswell { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + namespace { /// @private namespace numberparsing { @@ -13914,8 +14975,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are @@ -14030,7 +15091,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. + // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); @@ -14058,12 +15119,26 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); @@ -14106,14 +15181,16 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; -#ifdef SWAR_NUMBER_PARSING +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } -#endif +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } @@ -14180,9 +15257,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } + while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } @@ -14193,7 +15268,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { @@ -14252,7 +15327,12 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -14462,14 +15542,59 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // Check for minus sign + // Parse the integer part. // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; // // Parse the integer part. // @@ -14481,14 +15606,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); - // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) + // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... @@ -14496,13 +15619,9 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INCORRECT_TYPE; } - return ~i+1; + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. @@ -14515,9 +15634,137 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } return negative ? (~i+1) : i; } @@ -14598,46 +15845,239 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_HASWELL_NUMBERPARSING_H -/* end file include/simdjson/haswell/numberparsing.h */ -/* begin file include/simdjson/haswell/end.h */ -SIMDJSON_UNTARGET_HASWELL -/* end file include/simdjson/haswell/end.h */ -#endif // SIMDJSON_IMPLEMENTATION_HASWELL -#endif // SIMDJSON_HASWELL_COMMON_H -/* end file include/simdjson/haswell.h */ -/* begin file include/simdjson/ppc64.h */ -#ifndef SIMDJSON_PPC64_H -#define SIMDJSON_PPC64_H +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} -#if SIMDJSON_IMPLEMENTATION_PPC64 +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + return ondemand::number_type::floating_point_number; +} -namespace simdjson { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { -} // namespace ppc64 -} // namespace simdjson +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; -/* begin file include/simdjson/ppc64/implementation.h */ -#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H -#define SIMDJSON_PPC64_IMPLEMENTATION_H + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -namespace simdjson { -namespace ppc64 { + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_H +/* end file include/simdjson/haswell/numberparsing.h */ +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_HASWELL +#endif // SIMDJSON_HASWELL_COMMON_H +/* end file include/simdjson/haswell.h */ +/* begin file include/simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + + +#if SIMDJSON_IMPLEMENTATION_PPC64 + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + + +namespace simdjson { +namespace ppc64 { namespace { using namespace simdjson; @@ -14705,7 +16145,7 @@ class dom_parser_implementation final : public internal::dom_parser_implementati dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; @@ -15310,7 +16750,7 @@ template struct simd8x64 { (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline void compress(uint64_t mask, T *output) const { + simdjson_really_inline uint64_t compress(uint64_t mask, T *output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); @@ -15318,6 +16758,7 @@ template struct simd8x64 { output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); } simdjson_really_inline uint64_t to_bitmask() const { @@ -15772,13 +17213,25 @@ parse_eight_digits_unrolled(const uint8_t *chars) { } // namespace ppc64 } // namespace simdjson -#define SWAR_NUMBER_PARSING +#define SIMDJSON_SWAR_NUMBER_PARSING 1 /* begin file include/simdjson/generic/numberparsing.h */ #include namespace simdjson { namespace ppc64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + namespace { /// @private namespace numberparsing { @@ -15937,8 +17390,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are @@ -16053,7 +17506,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. + // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); @@ -16081,6 +17534,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's @@ -16129,14 +17596,16 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; -#ifdef SWAR_NUMBER_PARSING +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } -#endif +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } @@ -16203,9 +17672,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } + while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } @@ -16216,7 +17683,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { @@ -16275,7 +17742,12 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -16485,14 +17957,59 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // Check for minus sign + // Parse the integer part. // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; // // Parse the integer part. // @@ -16504,14 +18021,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); - // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) + // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... @@ -16519,13 +18034,9 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INCORRECT_TYPE; } - return ~i+1; + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. @@ -16538,45 +18049,173 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } - return negative ? (~i+1) : i; + return i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + const uint8_t *p = src + negative; // // Parse the integer part. // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - // Overflow check. More than 19 digits (minus the decimal) may be overflow. + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. overflow = p-src-1 > 19; if (simdjson_unlikely(overflow && leading_zero)) { // Skip leading 0.00000 and see if it still overflows @@ -16621,80 +18260,273 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING -} // namespace numberparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} -#endif // SIMDJSON_PPC64_NUMBERPARSING_H -/* end file include/simdjson/ppc64/numberparsing.h */ -/* begin file include/simdjson/ppc64/end.h */ -/* end file include/simdjson/ppc64/end.h */ +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} -#endif // SIMDJSON_IMPLEMENTATION_PPC64 +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + return ondemand::number_type::floating_point_number; +} -#endif // SIMDJSON_PPC64_H -/* end file include/simdjson/ppc64.h */ -/* begin file include/simdjson/westmere.h */ -#ifndef SIMDJSON_WESTMERE_H -#define SIMDJSON_WESTMERE_H +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#if SIMDJSON_IMPLEMENTATION_WESTMERE + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -#define SIMDJSON_TARGET_WESTMERE -#define SIMDJSON_UNTARGET_WESTMERE -#else -#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul") -#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION -#endif + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { -} // namespace westmere -} // namespace simdjson + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -// -// These two need to be included outside SIMDJSON_TARGET_WESTMERE -// -/* begin file include/simdjson/westmere/implementation.h */ -#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H -#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + exponent += exp_neg ? 0-exp : exp; + } -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -namespace westmere { + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -namespace { -using namespace simdjson; -using namespace simdjson::dom; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; } -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; -} // namespace westmere -} // namespace simdjson + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_PPC64_NUMBERPARSING_H +/* end file include/simdjson/ppc64/numberparsing.h */ +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_PPC64 + +#endif // SIMDJSON_PPC64_H +/* end file include/simdjson/ppc64.h */ +/* begin file include/simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + + +#if SIMDJSON_IMPLEMENTATION_WESTMERE + +#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_TARGET_WESTMERE +#define SIMDJSON_UNTARGET_WESTMERE +#else +#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul") +#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { +} // namespace westmere +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_WESTMERE +// +/* begin file include/simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H /* end file include/simdjson/westmere/implementation.h */ /* begin file include/simdjson/westmere/intrinsics.h */ #ifndef SIMDJSON_WESTMERE_INTRINSICS_H @@ -16769,7 +18601,7 @@ class dom_parser_implementation final : public internal::dom_parser_implementati dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, bool partial) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; @@ -17201,11 +19033,12 @@ namespace simd { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline void compress(uint64_t mask, T * output) const { + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); } simdjson_really_inline uint64_t to_bitmask() const { @@ -17653,13 +19486,25 @@ static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t } // namespace westmere } // namespace simdjson -#define SWAR_NUMBER_PARSING +#define SIMDJSON_SWAR_NUMBER_PARSING 1 /* begin file include/simdjson/generic/numberparsing.h */ #include namespace simdjson { namespace westmere { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + namespace { /// @private namespace numberparsing { @@ -17818,8 +19663,8 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason aboutthe product: there - // 0 or 1 leading zero in the product. + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. // Unless the least significant 9 bits of the high (64-bit) part of the full // product are all 1s, then we know that the most significant 55 bits are @@ -17934,7 +19779,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg mantissa &= ~(1ULL << 52); // we have to check that real_exponent is in range, otherwise we bail out if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinte value!!! We could actually throw an error here if we could. + // We have an infinite value!!! We could actually throw an error here if we could. return false; } d = to_double(mantissa, real_exponent, negative); @@ -17962,6 +19807,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's @@ -18010,14 +19869,16 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c // the integer into a float in a lossless manner. const uint8_t *const first_after_period = p; -#ifdef SWAR_NUMBER_PARSING +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING // this helps if we have lots of decimals! // this turns out to be frequent enough. if (is_made_of_eight_digits_fast(p)) { i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } -#endif +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) if (parse_digit(*p, i)) { ++p; } while (parse_digit(*p, i)) { p++; } @@ -18084,9 +19945,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { - start++; - } + while ((*start == '0') || (*start == '.')) { ++start; } // we over-decrement by one when there is a '.' return digit_count - size_t(start - start_digits); } @@ -18097,7 +19956,7 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. // - // 9999999999999999999 < 2**64 so we can accomodate 19 digits. + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. // If we have a decimal separator, then digit_count - 1 is the number of digits, but we // may not have a decimal separator! if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { @@ -18156,7 +20015,12 @@ simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } - +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -18366,14 +20230,59 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // Check for minus sign + // Parse the integer part. // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; // // Parse the integer part. // @@ -18385,14 +20294,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. size_t digit_count = size_t(p - start_digits); - // The longest negative 64-bit number is 19 digits. // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) + // ((digit_count == 0) || (digit_count > 20)) // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... @@ -18400,13 +20307,9 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INCORRECT_TYPE; } - return ~i+1; + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { // Positive overflow check: // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the // biggest uint64_t. @@ -18419,40 +20322,168 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // - Therefore, if the number is positive and lower than that, it's overflow. // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } - return negative ? (~i+1) : i; + return i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + const uint8_t *p = src + negative; // // Parse the integer part. // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // - // Parse the decimal part. + // Check for minus sign // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits p++; while (parse_digit(*p, i)) { p++; } exponent = -(p - start_decimal_digits); @@ -18502,6 +20533,199 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} } //namespace {} #endif // SIMDJSON_SKIPNUMBERPARSING @@ -18651,22 +20875,28 @@ struct implementation_simdjson_result_base { */ simdjson_really_inline operator T&&() && noexcept(false); + +#endif // SIMDJSON_EXCEPTIONS + /** * Get the result value. This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline const T& value_unsafe() const& noexcept; - + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evoluates to false. + * the error() method returns a value that evaluates to false. */ simdjson_really_inline T&& value_unsafe() && noexcept; - -#endif // SIMDJSON_EXCEPTIONS - - T first{}; - error_code second{UNINITIALIZED}; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION @@ -18693,7 +20923,6 @@ using depth_t = int32_t; namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - /** * The type of a JSON value. */ @@ -18707,6 +20936,106 @@ enum class json_type { null ///< A JSON null (null) }; +class value_iterator; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_really_inline number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_really_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_really_inline uint64_t get_uint64() const noexcept; + simdjson_really_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_really_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_really_inline int64_t get_int64() const noexcept; + simdjson_really_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_really_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_really_inline double get_double() const noexcept; + simdjson_really_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_really_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + /** Store a signed 64-bit value to the number. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_really_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_really_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; + friend class value_iterator; +}; + /** * Write the JSON type to the output stream * @@ -18714,6 +21043,7 @@ enum class json_type { * @param type The json_type. */ inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -18773,23 +21103,26 @@ namespace logger { static constexpr const bool LOG_ENABLED = false; #endif -static simdjson_really_inline void log_headers() noexcept; -static simdjson_really_inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; -static simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; -static simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; -static simdjson_really_inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; -static simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; -static simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; -static simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; - -static simdjson_really_inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; -static simdjson_really_inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static simdjson_really_inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; } // namespace logger } // namespace ondemand @@ -18873,7 +21206,7 @@ class raw_json_string { * long strings. * * If target is a compile-time constant, and your compiler likes you, - * you should be able to do the following without performance penatly... + * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); @@ -18887,7 +21220,7 @@ class raw_json_string { * the caller is responsible for this check. See is_free_from_unescaped_quote. * * If target is a compile-time constant, and your compiler likes you, - * you should be able to do the following without performance penatly... + * you should be able to do the following without performance penalty... * * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); @@ -19018,11 +21351,12 @@ class token_iterator { /** * Advance to the next token (returning the current one). - * - * Does not check or update depth/expect_value. Caller is responsible for that. */ - simdjson_really_inline const uint8_t *advance() noexcept; - + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_really_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * @@ -19052,8 +21386,6 @@ class token_iterator { * * @param position The position of the token. * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... */ simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -19066,13 +21398,13 @@ class token_iterator { simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; /** - * Save the current index to be restored later. + * Return the current index. */ simdjson_really_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ - simdjson_really_inline void set_position(token_position target_checkpoint) noexcept; + simdjson_really_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. @@ -19085,7 +21417,7 @@ class token_iterator { simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; protected: - simdjson_really_inline token_iterator(const uint8_t *buf, token_position index) noexcept; + simdjson_really_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). @@ -19107,7 +21439,7 @@ class token_iterator { simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; - token_position index{}; + token_position _position{}; friend class json_iterator; friend class value_iterator; @@ -19139,6 +21471,7 @@ namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { class document; +class document_stream; class object; class array; class value; @@ -19180,14 +21513,27 @@ class json_iterator { * - 3 = key or value inside root array/object. */ depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; public: simdjson_really_inline json_iterator() noexcept = default; simdjson_really_inline json_iterator(json_iterator &&other) noexcept; simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept; - simdjson_really_inline json_iterator(const json_iterator &other) noexcept = delete; - simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = delete; - + simdjson_really_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ @@ -19199,19 +21545,30 @@ class json_iterator { simdjson_really_inline bool at_root() const noexcept; /** - * Get the root value iterator + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). */ - simdjson_really_inline token_position root_checkpoint() const noexcept; + simdjson_really_inline bool streaming() const noexcept; /** - * Assert if the iterator is not at the start + * Get the root value iterator + */ + simdjson_really_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_really_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document */ simdjson_really_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_really_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). @@ -19224,10 +21581,22 @@ class json_iterator { simdjson_really_inline void abandon() noexcept; /** - * Advance the current token. + * Advance the current token without modifying depth. */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * @@ -19247,12 +21616,21 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to derefence this pointer. + */ + simdjson_really_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * * This is not null-terminated; it is a view into the JSON. * - * @param index The position of the token to retrieve. + * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... @@ -19263,7 +21641,7 @@ class json_iterator { * * The length will include any whitespace at the end of the token. * - * @param index The position of the token to retrieve. + * @param position The position of the token to retrieve. */ simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; /** @@ -19292,8 +21670,8 @@ class json_iterator { * * @param child_depth the expected child depth. */ - simdjson_really_inline void descend_to(depth_t parent_depth) noexcept; - simdjson_really_inline void descend_to(depth_t parent_depth, int32_t delta) noexcept; + simdjson_really_inline void descend_to(depth_t child_depth) noexcept; + simdjson_really_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. @@ -19306,7 +21684,7 @@ class json_iterator { simdjson_really_inline uint8_t *&string_buf_loc() noexcept; /** - * Report an error, preventing further iteration. + * Report an unrecoverable error, preventing further iteration. * * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. @@ -19321,8 +21699,6 @@ class json_iterator { simdjson_really_inline error_code optional_error(error_code error, const char *message) noexcept; template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; - template simdjson_warn_unused simdjson_really_inline bool peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; - template simdjson_warn_unused simdjson_really_inline bool advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; simdjson_really_inline token_position position() const noexcept; simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; @@ -19330,12 +21706,30 @@ class json_iterator { simdjson_really_inline token_position start_position(depth_t depth) const noexcept; simdjson_really_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() noexcept; + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; protected: simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; - simdjson_really_inline token_position last_document_position() const noexcept; + /// The last token before the end + simdjson_really_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_really_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_really_inline token_position end() const noexcept; friend class document; + friend class document_stream; friend class object; friend class array; friend class value; @@ -19391,8 +21785,6 @@ class value_iterator { depth_t _depth{}; /** * The starting token index for this value - * - * PERF NOTE: this is a safety check; we expect this to be elided in release builds. */ token_position _start_position{}; @@ -19414,7 +21806,7 @@ class value_iterator { /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_really_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value @@ -19451,7 +21843,7 @@ class value_iterator { * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration @@ -19481,11 +21873,23 @@ class value_iterator { /** * Start an object iteration after the user has already checked and moved past the {. * - * Does not move the iterator. + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. @@ -19495,6 +21899,7 @@ class value_iterator { * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; @@ -19591,13 +21996,25 @@ class value_iterator { simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; /** - * Start an array iteration after the user has already checked and moved past the [. + * Start an array iteration, after the user has already checked and moved past the [. * - * Does not move the iterator. + * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline bool started_array() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. @@ -19626,17 +22043,31 @@ class value_iterator { simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_int64_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_double_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; simdjson_really_inline bool is_null() noexcept; + simdjson_warn_unused simdjson_really_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double_in_string() noexcept; simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; + simdjson_warn_unused simdjson_really_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result is_root_integer() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number_type() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number() noexcept; simdjson_really_inline bool is_root_null() noexcept; simdjson_really_inline error_code error() const noexcept; @@ -19648,31 +22079,130 @@ class value_iterator { simdjson_really_inline bool is_valid() const noexcept; /** @} */ - protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_really_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_really_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_really_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_really_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_really_inline const uint8_t *peek_start() const noexcept; simdjson_really_inline uint32_t peek_start_length() const noexcept; - simdjson_really_inline const uint8_t *advance_start(const char *type) const noexcept; - simdjson_really_inline error_code advance_container_start(const char *type, const uint8_t *&json) const noexcept; - simdjson_really_inline const uint8_t *advance_root_scalar(const char *type) const noexcept; - simdjson_really_inline const uint8_t *advance_non_root_scalar(const char *type) const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_really_inline void advance_scalar(const char *type) noexcept; + simdjson_really_inline void advance_root_scalar(const char *type) noexcept; + simdjson_really_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_really_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_really_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_really_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_really_inline simdjson_result advance_to_value() noexcept; simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_really_inline bool is_at_start() const noexcept; - simdjson_really_inline bool is_at_container_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ simdjson_really_inline bool is_at_iterator_start() const noexcept; - simdjson_really_inline void assert_at_start() const noexcept; - simdjson_really_inline void assert_at_container_start() const noexcept; - simdjson_really_inline void assert_at_root() const noexcept; - simdjson_really_inline void assert_at_child() const noexcept; - simdjson_really_inline void assert_at_next() const noexcept; - simdjson_really_inline void assert_at_non_root_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_really_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_really_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_really_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -19894,8 +22424,91 @@ class array { * Part of the std::iterable interface. */ simdjson_really_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once as the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) noexcept; protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; + /** * Begin array iteration. * @@ -19921,7 +22534,7 @@ class array { * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ - static simdjson_really_inline array started(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. @@ -19961,6 +22574,11 @@ struct simdjson_result : publi simdjson_really_inline simdjson_result begin() noexcept; simdjson_really_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -19977,9 +22595,10 @@ class object; class value; class raw_json_string; class array_iterator; +class document_stream; /** - * A JSON document iteration. + * A JSON document. It holds a json_iterator instance. * * Used by tokens to get text, and string buffer location. * @@ -19993,7 +22612,7 @@ class document { * Exists so you can declare a variable and later assign to it before use. */ simdjson_really_inline document() noexcept = default; - simdjson_really_inline document(const document &other) noexcept = delete; + simdjson_really_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy simdjson_really_inline document(document &&other) noexcept = default; simdjson_really_inline document &operator=(const document &other) noexcept = delete; simdjson_really_inline document &operator=(document &&other) noexcept = default; @@ -20019,6 +22638,13 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_really_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * @@ -20026,6 +22652,13 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ simdjson_really_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * @@ -20033,6 +22666,14 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ simdjson_really_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * @@ -20059,6 +22700,14 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not true or false. */ simdjson_really_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_really_inline simdjson_result get_value() noexcept; + /** * Checks if this JSON value is null. * @@ -20092,7 +22741,9 @@ class document { /** * Get this value as the given type. * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -20164,8 +22815,49 @@ class document { * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ simdjson_really_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_really_inline operator value() noexcept(false); #endif - + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once as the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * @@ -20241,6 +22933,77 @@ class document { */ simdjson_really_inline simdjson_result type() noexcept; + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_really_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_really_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function req + * uires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_really_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * a integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + /** * Get the raw JSON for this token. * @@ -20266,18 +23029,76 @@ class document { simdjson_really_inline simdjson_result raw_json_token() noexcept; /** - * Get the root value. + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. */ - simdjson_really_inline value get_root_value() noexcept; - simdjson_really_inline operator value() noexcept; + inline simdjson_result current_location() noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containg + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; protected: + /** + * Consumes the document. + */ + simdjson_really_inline error_code consume() noexcept; + simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; simdjson_really_inline value_iterator resume_value_iterator() noexcept; simdjson_really_inline value_iterator get_root_value_iterator() noexcept; - simdjson_really_inline value resume_value() noexcept; + simdjson_really_inline simdjson_result start_or_resume_object() noexcept; static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; // @@ -20286,7 +23107,6 @@ class document { json_iterator iter{}; ///< Current position in the document static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 - friend struct simdjson_result; friend class array_iterator; friend class value; friend class ondemand::parser; @@ -20294,13 +23114,74 @@ class document { friend class array; friend class field; friend class token; + friend class document_stream; }; -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_really_inline document_reference() noexcept; + simdjson_really_inline document_reference(document &d) noexcept; + simdjson_really_inline document_reference(const document_reference &other) noexcept = default; + simdjson_really_inline void rewind() noexcept; + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; + + simdjson_really_inline bool is_null() noexcept; + simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_really_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator array() & noexcept(false); + simdjson_really_inline operator object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator value() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { @@ -20308,15 +23189,18 @@ struct simdjson_result : pu simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline error_code rewind() noexcept; simdjson_really_inline simdjson_result get_array() & noexcept; simdjson_really_inline simdjson_result get_object() & noexcept; simdjson_really_inline simdjson_result get_uint64() noexcept; simdjson_really_inline simdjson_result get_int64() noexcept; simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_double_from_string() noexcept; simdjson_really_inline simdjson_result get_string() noexcept; simdjson_really_inline simdjson_result get_raw_json_string() noexcept; simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; simdjson_really_inline bool is_null() noexcept; template simdjson_really_inline simdjson_result get() & noexcept; @@ -20334,8 +23218,11 @@ struct simdjson_result : pu simdjson_really_inline operator std::string_view() noexcept(false); simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); #endif - + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; simdjson_really_inline simdjson_result begin() & noexcept; simdjson_really_inline simdjson_result end() & noexcept; simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; @@ -20344,13 +23231,80 @@ struct simdjson_result : pu simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ simdjson_really_inline simdjson_result raw_json_token() noexcept; + + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline error_code rewind() noexcept; + + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_really_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; + } // namespace simdjson /* end file include/simdjson/generic/ondemand/document.h */ /* begin file include/simdjson/generic/ondemand/value.h */ @@ -20424,11 +23378,19 @@ class value { /** * Cast this JSON value to an unsigned integer. * - * @returns A signed 64-bit integer. + * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ simdjson_really_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + /** * Cast this JSON value to a signed integer. * @@ -20437,6 +23399,14 @@ class value { */ simdjson_really_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + /** * Cast this JSON value to a double. * @@ -20445,6 +23415,14 @@ class value { */ simdjson_really_inline simdjson_result get_double() noexcept; + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double_in_string() noexcept; + /** * Cast this JSON value to a string. * @@ -20563,7 +23541,26 @@ class value { * Part of the std::iterable interface. */ simdjson_really_inline simdjson_result end() & noexcept; - + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once as the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * @@ -20633,6 +23630,85 @@ class value { */ simdjson_really_inline simdjson_result type() noexcept; + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_really_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_really_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_really_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * a integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + + /** * Get the raw JSON for this token. * @@ -20658,6 +23734,50 @@ class value { */ simdjson_really_inline std::string_view raw_json_token() noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceeding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + protected: /** * Create a value. @@ -20696,7 +23816,6 @@ class value { friend class field; friend class object; friend struct simdjson_result; - friend struct simdjson_result; friend struct simdjson_result; }; @@ -20717,8 +23836,11 @@ struct simdjson_result : publi simdjson_really_inline simdjson_result get_object() noexcept; simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_double_in_string() noexcept; simdjson_really_inline simdjson_result get_string() noexcept; simdjson_really_inline simdjson_result get_raw_json_string() noexcept; simdjson_really_inline simdjson_result get_bool() noexcept; @@ -20738,7 +23860,8 @@ struct simdjson_result : publi simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); simdjson_really_inline operator bool() noexcept(false); #endif - + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) noexcept; simdjson_really_inline simdjson_result begin() & noexcept; simdjson_really_inline simdjson_result end() & noexcept; @@ -20801,9 +23924,16 @@ struct simdjson_result : publi * let it throw an exception). */ simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ simdjson_really_inline simdjson_result raw_json_token() noexcept; + + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -20901,7 +24031,6 @@ class object { simdjson_really_inline simdjson_result begin() noexcept; simdjson_really_inline simdjson_result end() noexcept; - /** * Look up a field by name on an object (order-sensitive). * @@ -20958,10 +24087,92 @@ class object { /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceeding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containg + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; + protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; - static simdjson_really_inline object started(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_really_inline object resume(const value_iterator &iter) noexcept; simdjson_really_inline object(const value_iterator &iter) noexcept; @@ -20995,6 +24206,11 @@ struct simdjson_result : publ simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + }; } // namespace simdjson @@ -21009,6 +24225,23 @@ class array; class object; class value; class raw_json_string; +class document_stream; + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; /** * A JSON fragment iterator. @@ -21022,11 +24255,12 @@ class parser { * * The new parser will have zero capacity. */ - inline parser() noexcept = default; + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; simdjson_really_inline parser(const parser &other) = delete; simdjson_really_inline parser &operator=(const parser &other) = delete; + simdjson_really_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; @@ -21037,6 +24271,11 @@ class parser { * ondemand::parser parser; * document doc = parser.iterate(json); * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * * ### IMPORTANT: Buffer Lifetime * * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as @@ -21101,14 +24340,15 @@ class parser { * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before * you call parse() again or destroy the parser. * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what * those bytes are initialized to, as long as they are allocated. * * @param json The JSON to parse. - * @param len The length of the JSON. - * @param allocated The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). * * @return The iterator, or an error: * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. @@ -21121,38 +24361,126 @@ class parser { */ simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; - /** The capacity of this parser (the largest document it can process). */ - simdjson_really_inline size_t capacity() const noexcept; - /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ - simdjson_really_inline size_t max_depth() const noexcept; - -private: - /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; - size_t _capacity{0}; - size_t _max_depth{DEFAULT_MAX_DEPTH}; - std::unique_ptr string_buf{}; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - std::unique_ptr start_positions{}; -#endif /** - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. + * Parse a buffer containing many JSON documents. * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return The error, if there is one. - */ - simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; - - friend class json_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_really_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_really_inline size_t max_capacity() const noexcept; + simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; + /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ + simdjson_really_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + namespace simdjson { template<> @@ -21165,518 +24493,606 @@ struct simdjson_result : publ } // namespace simdjson /* end file include/simdjson/generic/ondemand/parser.h */ -/* end file include/simdjson/generic/ondemand.h */ +/* begin file include/simdjson/generic/ondemand/document_stream.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif -// Inline definitions -/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { -// -// internal::implementation_simdjson_result_base inline implementation -// +class parser; +class json_iterator; +class document; -template -simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); -template -simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +private: -template -simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); -#if SIMDJSON_EXCEPTIONS + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; -template -simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; -template -simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED -template -simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_really_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; -template -simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} + simdjson_really_inline ~document_stream() noexcept; -template -simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; -} + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; -template -simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); -} + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; -#endif // SIMDJSON_EXCEPTIONS + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + using difference_type = std::ptrdiff_t; -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ -/* begin file include/simdjson/generic/ondemand-inl.h */ -/* begin file include/simdjson/generic/ondemand/json_type-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { + using iterator_category = std::input_iterator_tag; -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} + /** + * Default constructor. + */ + simdjson_really_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_really_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_really_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_really_inline size_t current_index() const noexcept; -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); -} -#endif + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_really_inline std::string_view source() const noexcept; -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; -namespace simdjson { + private: + simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} + friend class document; + friend class document_stream; + friend class json_iterator; + }; -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_type-inl.h */ -/* begin file include/simdjson/generic/ondemand/logger-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -namespace logger { + /** + * Start iterating the documents in the stream. + */ + simdjson_really_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_really_inline iterator end() noexcept; -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. +private: -// Helper to turn unprintable or newline characters into spaces -static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying -simdjson_really_inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_really_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; -simdjson_really_inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail); -} -simdjson_really_inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; -simdjson_really_inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail); - if (LOG_ENABLED) { log_depth++; } -} -simdjson_really_inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta); - if (LOG_ENABLED) { log_depth++; } -} + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; -simdjson_really_inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta); -} + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; -simdjson_really_inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta); -} -simdjson_really_inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail); -} + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; -simdjson_really_inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); -} + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; -simdjson_really_inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); -} + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; -simdjson_really_inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); -} + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; -simdjson_really_inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} + inline void load_from_stage1_thread() noexcept; -simdjson_really_inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); -} + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; -simdjson_really_inline void log_headers() noexcept { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; - printf("|%.*s", LOG_EVENT_LEN+2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN+2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5+2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); - } -} + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; -simdjson_really_inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, iter.token.index+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); -} -simdjson_really_inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { - if (LOG_ENABLED) { - const int indent = depth*2; - const auto buf = iter.token.buf; - printf("| %*s%s%-*s ", - indent, "", - title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title - ); - { - // Print the current structural. - printf("| "); - auto current_structural = &buf[*index]; - for (int i=0;i; + friend struct internal::simdjson_result_base; +}; // document_stream -} // namespace logger } // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION } // namespace simdjson -/* end file include/simdjson/generic/ondemand/logger-inl.h */ -/* begin file include/simdjson/generic/ondemand/raw_json_string-inl.h */ + namespace simdjson { +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; +}; -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document_stream.h */ +/* begin file include/simdjson/generic/ondemand/serialization.h */ -simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson -simdjson_really_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { - uint8_t *end = stringparsing::parse_string(buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; -} -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; -} +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/* end file include/simdjson/generic/ondemand/serialization.h */ +/* end file include/simdjson/generic/ondemand.h */ -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; -} +// Inline definitions +/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +// +// internal::implementation_simdjson_result_base inline implementation +// -simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +template +simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } } -simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); - } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +template +simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; } -simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; +template +simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; } +#if SIMDJSON_EXCEPTIONS -simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +template +simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; } -simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; +template +simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); } -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); +template +simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); } -simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; +template +simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); } -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); -} +#endif // SIMDJSON_EXCEPTIONS -simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +template +simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } +template +simdjson_really_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { - return unescape(iter.string_buf_loc()); +template +simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); } +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } -} - -} // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION } // namespace simdjson - +/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +/* begin file include/simdjson/generic/ondemand-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_type-inl.h */ namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { - if (error()) { return error(); } - return first.unescape(dst); + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape(iter); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ -/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, token_position _index) noexcept - : buf{_buf}, index{_index} -{ -} -simdjson_really_inline const uint8_t *token_iterator::advance() noexcept { - return &buf[*(index++)]; +simdjson_really_inline number_type number::get_number_type() const noexcept { + return type; } -simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; +simdjson_really_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; + +simdjson_really_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_really_inline number::operator uint64_t() const noexcept { + return get_uint64(); } -simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(index+delta)]; + +simdjson_really_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; } -simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(index+delta); + +simdjson_really_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; } -simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(index+delta+1) - *(index+delta); + +simdjson_really_inline number::operator int64_t() const noexcept { + return get_int64(); } -simdjson_really_inline token_position token_iterator::position() const noexcept { - return index; +simdjson_really_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -simdjson_really_inline void token_iterator::set_position(token_position target_checkpoint) noexcept { - index = target_checkpoint; + +simdjson_really_inline double number::get_double() const noexcept { + return payload.floating_point_number; } -simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return index == other.index; +simdjson_really_inline number::operator double() const noexcept { + return get_double(); } -simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return index != other.index; + +simdjson_really_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); } -simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return index > other.index; + +simdjson_really_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return index >= other.index; + +simdjson_really_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; } -simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return index < other.index; + +simdjson_really_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return index <= other.index; + +simdjson_really_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; } } // namespace ondemand @@ -21685,933 +25101,3068 @@ simdjson_really_inline bool token_iterator::operator<=(const token_iterator &oth namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ +/* end file include/simdjson/generic/ondemand/json_type-inl.h */ +/* begin file include/simdjson/generic/ondemand/logger-inl.h */ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { +namespace logger { -simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - _depth{other._depth} -{ - other.parser = nullptr; -} -simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - _depth = other._depth; - other.parser = nullptr; - return *this; -} - -simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, _parser->implementation->structural_indexes.get()), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1} -{ - logger::log_headers(); -} - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - - switch (*advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. - // Now that we've considered the first value, we only increment/decrement for arrays/objects - auto end = &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; - while (token.index <= end) { - switch (*advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; } - - return report_error(TAPE_ERROR, "not enough close braces"); -} - -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_really_inline bool json_iterator::at_root() const noexcept { - return token.position() == root_checkpoint(); } -simdjson_really_inline token_position json_iterator::root_checkpoint() const noexcept { - return parser->implementation->structural_indexes.get(); +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); } -simdjson_really_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); - // Visual Studio Clang treats unique_ptr.get() as "side effecting." -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( token.index == parser->implementation->structural_indexes.get() ); -#endif +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail); } - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return token.index == &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); } -simdjson_really_inline bool json_iterator::is_alive() const noexcept { - return parser; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_really_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta); + if (LOG_ENABLED) { log_depth++; } } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return token.advance(); +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta); } -simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { - return token.peek(delta); +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta); } - -simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { - return token.peek_length(delta); +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail); } -simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - return token.peek(position); +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { - return token.peek_length(position); +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_really_inline token_position json_iterator::last_document_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; -} -simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_document_position()); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_really_inline depth_t json_iterator::depth() const noexcept { - return _depth; -} - -simdjson_really_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; -} +inline void log_headers() noexcept { + if (LOG_ENABLED) { + // Technically a static variable is not thread-safe, but if you are using threads + // and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if(!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf("# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The identation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); -simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; + printf("|%.*s", LOG_EVENT_LEN+2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN+2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5+2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } } -simdjson_really_inline token_position json_iterator::position() const noexcept { - return token.position(); +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); } -simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { + if (LOG_ENABLED) { + const int indent = depth*2; + const auto buf = iter.token.buf; + printf("| %*s%s%-*s ", + indent, "", + title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title + ); + { + // Print the current structural. + printf("| "); + auto current_structural = &buf[*index]; + for (int i=0;istart_positions[depth]; -} -simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - parser->start_positions[depth] = position; -} +} // namespace logger +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/logger-inl.h */ +/* begin file include/simdjson/generic/ondemand/raw_json_string-inl.h */ +namespace simdjson { -#endif +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { +simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_really_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { + uint8_t *end = stringparsing::parse_string(buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { - // Truncate whitespace to fit the buffer. - if (max_len > N-1) { - if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } - max_len = N-1; +simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } } - - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - tmpbuf[max_len] = ' '; return true; } -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept { - auto max_len = token.peek_length(); - auto json = token.peek(); - return copy_to_buffer(json, max_len, tmpbuf); -} - -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept { - auto max_len = peek_length(); - auto json = advance(); - return copy_to_buffer(json, max_len, tmpbuf); +simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; } -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -simdjson_really_inline value_iterator::value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept - : _json_iter{json_iter}, - _depth{depth}, - _start_position{start_index} -{ +simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { - const uint8_t *json; - SIMDJSON_TRY( advance_container_start("object", json) ); - if (*json != '{') { return incorrect_type_error("Not an object"); } - return started_object(); +simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { - bool result; - SIMDJSON_TRY( start_object().get(result) ); - if (*_json_iter->peek_last() != '}') { return _json_iter->report_error(TAPE_ERROR, "object invalid: { at beginning of document unmatched by } at end of document"); } - return result; +simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return false; + +simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } } - logger::log_start_value(*_json_iter, "object"); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, _start_position); -#endif + if(r[pos] != '"') { return false; } return true; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); - - switch (*_json_iter->advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - _json_iter->ascend_to(depth()-1); +simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. return false; - case ',': - return true; - default: - return _json_iter->report_error(TAPE_ERROR, "Missing comma between object fields"); + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } } + if(r[pos] != '"') { return false; } + return true; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first_field()) { - has_value = true; +simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - has_value = false; +simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != _start_position) { return OUT_OF_ORDER_ITERATION; } -#endif - } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; +simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - return true; - } +simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + +simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { + return unescape(iter.string_buf_loc()); +} + + +simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; } +} - // If the loop ended, we're out of fields to look at. - return false; +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { + if (error()) { return error(); } + return first.unescape(dst); +} +simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape(iter); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first_field()) { - // If we're at the beginning of the object, we definitely have a field - has_value = true; +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ +/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - has_value = false; +simdjson_really_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // Finish the previous value and see if , or } is next - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != _start_position) { return OUT_OF_ORDER_ITERATION; } -#endif - } +simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // First, we scan from that point to the end. - // If we don't find a match, we loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes +simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - if ((error = field_value() )) { abandon(); return error; } +simdjson_really_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - return true; - } +simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + /*** + * WARNING: + * Inside an object, a string value is a depth of +1 compared to the object. Yet a key + * is at the same depth as the object. + * But json_iterator cannot easily tell whether we are pointing at a key or a string value. + * Instead, it assumes that if you are pointing at a string, then it is a value, not a key. + * To be clear... + * the following code assumes that we are *not* pointing at a key. If we are then a bug + * will follow. Unfortunately, it is not possible for the json_iterator its to make this + * check. + */ + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + /*case '"': + if(*peek() == ':') { + // we are at a key!!! This is + // only possible if someone searched + // for a key in an object and the key + // was not found but our code then + // decided the consume the separating + // comma before returning. + logger::log_value(*this, "key"); + advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough;*/ + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_really_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_really_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_really_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_really_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_really_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_really_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_really_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_really_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_really_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_really_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(0); +} + +simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_really_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_really_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_really_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_really_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + return parser->start_positions[depth]; +} + +simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + parser->start_positions[depth] = position; +} + +#endif + + +simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + +template +simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Truncate whitespace to fit the buffer. + if (max_len > N-1) { + // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } + max_len = N-1; + } + + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + tmpbuf[max_len] = ' '; + return true; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if (! _json_iter->streaming() && (*_json_iter->peek_last() != '}')) { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + return started_object(); +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() && (*_json_iter->peek_last() != ']')) { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + return started_array(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { + return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { + return get_raw_json_string().unescape(_json_iter->string_buf_loc()); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_really_inline bool value_iterator::is_null() noexcept { + auto result = parse_null(peek_non_root_scalar("null")); + if(result) { advance_non_root_scalar("null"); } + return result; +} +simdjson_really_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_really_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_really_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_really_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_really_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_really_inline simdjson_result value_iterator::is_root_integer() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + return numberparsing::is_integer(tmpbuf); +} + +simdjson_really_inline simdjson_result value_iterator::get_root_number_type() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + return numberparsing::get_number_type(tmpbuf); +} +simdjson_really_inline simdjson_result value_iterator::get_root_number() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + advance_root_scalar("number"); + return num; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { + return get_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { + return get_raw_json_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("double"); } + return result; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("bool"); } + return result; +} +simdjson_really_inline bool value_iterator::is_root_null() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(result) { advance_root_scalar("null"); } + return result; +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_really_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_really_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_really_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_really_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_really_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_really_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_really_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_really_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_really_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_really_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_really_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_really_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_really_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_really_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { + move_at_container_start(); + return started_array(); +} + +simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_really_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_really_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_really_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_really_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_really_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_really_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_really_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value +) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// object_iterator +// + +simdjson_really_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_really_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_really_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_really_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_really_inline simdjson_result array::begin() noexcept { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_really_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_really_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_really_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} + +simdjson_really_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_really_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array-inl.h */ +/* begin file include/simdjson/generic/ondemand/document-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_really_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() noexcept { + return iter.current_location(); +} + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_really_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': + case '{': + return value(get_root_value_iterator()); + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + // return value(get_root_value_iterator()); + } +} +simdjson_really_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_really_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} +simdjson_really_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(); +} +simdjson_really_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(); +} +simdjson_really_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(); +} +simdjson_really_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(); +} +simdjson_really_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(); +} +simdjson_really_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(); +} +simdjson_really_inline simdjson_result document::get_string() noexcept { + return get_root_value_iterator().get_root_string(); +} +simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(); +} +simdjson_really_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(); +} +simdjson_really_inline bool document::is_null() noexcept { + return get_root_value_iterator().is_root_null(); +} + +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_really_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_really_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_really_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_really_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { + iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ + iter.assert_at_document_depth(); + } + return answer; +} +simdjson_really_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { + iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ + iter.assert_at_document_depth(); + } + return answer; +} +simdjson_really_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_really_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_really_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_really_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_really_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_really_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_really_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_really_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(); +} + +simdjson_really_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(); +} + +simdjson_really_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(); +} + + +simdjson_really_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - SIMDJSON_TRY( skip_child() ); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } - } +template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - _json_iter->reenter_child(_start_position + 1, _depth); +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} - has_value = started_object(); - while (_json_iter->position() < search_start) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - error = field_value(); SIMDJSON_ASSUME(!error); +simdjson_really_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - return true; - } +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - SIMDJSON_TRY( skip_child() ); - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - } +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); - const uint8_t *key = _json_iter->advance(); - if (*(key++) != '"') { return _json_iter->report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); - if (*_json_iter->advance() != ':') { return _json_iter->report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { - const uint8_t *json; - SIMDJSON_TRY( advance_container_start("array", json) ); - if (*json != '[') { return incorrect_type_error("Not an array"); } - return started_array(); +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { - bool result; - SIMDJSON_TRY( start_array().get(result) ); - if (*_json_iter->peek_last() != ']') { return _json_iter->report_error(TAPE_ERROR, "array invalid: [ at beginning of document unmatched by ] at end of document"); } - return result; +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return false; - } - logger::log_start_value(*_json_iter, "array"); - _json_iter->descend_to(depth()+1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, _start_position); + +} // namespace simdjson + + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_really_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_really_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_really_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_really_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +simdjson_really_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } +simdjson_really_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } +simdjson_really_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } +simdjson_really_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } +simdjson_really_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } +simdjson_really_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } +simdjson_really_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_really_inline bool document_reference::is_null() noexcept { return doc->is_null(); } + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_really_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_really_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } +simdjson_really_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } +simdjson_really_inline document_reference::operator double() noexcept(false) { return double(*doc); } +simdjson_really_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_really_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_really_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } +simdjson_really_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif - return true; -} +simdjson_really_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_really_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_really_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_really_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_really_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_really_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_really_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_really_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_really_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_really_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); }; +simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } +simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } +simdjson_really_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } +simdjson_really_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_really_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_really_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_really_inline document_reference::operator document&() const noexcept { return *doc; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson - switch (*_json_iter->advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - _json_iter->ascend_to(depth()-1); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return _json_iter->report_error(TAPE_ERROR, "Missing comma between array elements"); - } -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); + +namespace simdjson { +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); } -simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { - return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { - return get_raw_json_string().unescape(_json_iter->string_buf_loc()); +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = advance_start("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - return raw_json_string(json+1); +simdjson_really_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { - return numberparsing::parse_unsigned(advance_non_root_scalar("uint64")); +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { - return numberparsing::parse_integer(advance_non_root_scalar("int64")); +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + return {}; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { - return numberparsing::parse_double(advance_non_root_scalar("double")); +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { - return parse_bool(advance_non_root_scalar("bool")); +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_really_inline bool value_iterator::is_null() noexcept { - return parse_null(advance_non_root_scalar("null")); +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -constexpr const uint32_t MAX_INT_LENGTH = 1024; - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { - return get_string(); +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_really_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { - return get_raw_json_string(); +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { - auto max_len = peek_start_length(); - auto json = advance_root_scalar("uint64"); - uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } - return numberparsing::parse_unsigned(tmpbuf); +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { - auto max_len = peek_start_length(); - auto json = advance_root_scalar("int64"); - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } - return numberparsing::parse_integer(tmpbuf); +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { - auto max_len = peek_start_length(); - auto json = advance_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } - return numberparsing::parse_double(tmpbuf); +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { - auto max_len = peek_start_length(); - auto json = advance_root_scalar("bool"); - uint8_t tmpbuf[5+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } - return parse_bool(tmpbuf); +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_really_inline bool value_iterator::is_root_null() noexcept { - auto max_len = peek_start_length(); - auto json = advance_root_scalar("null"); - return max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5])); +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); - - return _json_iter->skip_child(depth()); +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_really_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; +simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_really_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_really_inline bool value_iterator::at_eof() const noexcept { - return _json_iter->at_eof(); +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_really_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.index == _start_position; -} -simdjson_really_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); - return _json_iter->token.index == _start_position + 1; +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document-inl.h */ +/* begin file include/simdjson/generic/ondemand/value-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_really_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_really_inline value value::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_really_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); +simdjson_really_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_really_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } } -simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { - return _depth; +simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); } -simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; +simdjson_really_inline simdjson_result value::get_string() noexcept { + return iter.get_string(); } -simdjson_warn_unused simdjson_really_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); +simdjson_really_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); } -simdjson_warn_unused simdjson_really_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; +simdjson_really_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); } -simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; +simdjson_really_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); } - -simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(_start_position); +simdjson_really_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); } -simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(_start_position); +simdjson_really_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); } - -simdjson_really_inline const uint8_t *value_iterator::advance_start(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } - - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - auto result = _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; +simdjson_really_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_really_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_really_inline bool value::is_null() noexcept { + return iter.is_null(); } -simdjson_really_inline error_code value_iterator::advance_container_start(const char *type, const uint8_t *&json) const noexcept { - logger::log_start_value(*_json_iter, _start_position, depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - return SUCCESS; - } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - json = _json_iter->advance(); - return SUCCESS; +template simdjson_really_inline error_code value::get(T &out) noexcept { + return get().get(out); } -simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); - if (!is_at_start()) { return peek_start(); } - assert_at_root(); - auto result = _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline value::operator array() noexcept(false) { + return get_array(); } -simdjson_really_inline const uint8_t *value_iterator::advance_non_root_scalar(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); - if (!is_at_start()) { return peek_start(); } - - assert_at_non_root_start(); - auto result = _json_iter->advance(); - _json_iter->ascend_to(depth()-1); - return result; +simdjson_really_inline value::operator object() noexcept(false) { + return get_object(); } - -simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, _start_position, depth(), message); - return INCORRECT_TYPE; +simdjson_really_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); } - -simdjson_really_inline bool value_iterator::is_at_start() const noexcept { - return _json_iter->token.index == _start_position; +simdjson_really_inline value::operator int64_t() noexcept(false) { + return get_int64(); } -simdjson_really_inline bool value_iterator::is_at_container_start() const noexcept { - return _json_iter->token.index == _start_position + 1; +simdjson_really_inline value::operator double() noexcept(false) { + return get_double(); } -simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = _json_iter->token.index - _start_position; - return delta == 1 || delta == 2; +simdjson_really_inline value::operator std::string_view() noexcept(false) { + return get_string(); } - -simdjson_really_inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_really_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); } - -simdjson_really_inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_really_inline value::operator bool() noexcept(false) { + return get_bool(); } +#endif -simdjson_really_inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_really_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); } - -simdjson_really_inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_really_inline simdjson_result value::end() & noexcept { + return {}; } - -simdjson_really_inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_really_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; } - -simdjson_really_inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); +simdjson_really_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); } -simdjson_really_inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); +simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } - -simdjson_really_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; +simdjson_really_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } - -simdjson_really_inline simdjson_result value_iterator::type() noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; - } +simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { +simdjson_really_inline simdjson_result value::type() noexcept { + return iter.type(); +} -simdjson_really_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_really_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} -simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_really_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); + +simdjson_really_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); } -simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); } -simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if ((error = iter.error()) ) { return *this; } - if ((error = iter.skip_child() )) { return *this; } - if ((error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_really_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_really_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } } } // namespace ondemand @@ -22620,237 +28171,245 @@ simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value -) noexcept - : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) { - first.iter.assert_is_valid(); } -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) { } - -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } - return *first; + return first.count_elements(); } -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); } -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; } -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} -// -// object_iterator -// +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} -simdjson_really_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} -simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); } -simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); +simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); } -simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); } - -simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error - - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } - - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_really_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return false; } + return first.is_null(); } -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson +template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_really_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} -namespace simdjson { +template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_really_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -// If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -// If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -// Checks for ']' and ',' -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } } // namespace simdjson -/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array-inl.h */ +/* end file include/simdjson/generic/ondemand/value-inl.h */ +/* begin file include/simdjson/generic/ondemand/field-inl.h */ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -// -// ### Live States -// -// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the array is first found and the iterator is just past the `{`. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, -// depth == iter->depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter->depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the array iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an -// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter->depth == depth, and at_start == false. -// -// ## Terminal State -// -// The terminal state has iter->depth < depth. at_start is always false. -// -// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this -// by decrementing depth. In this state, iter->depth < depth, at_start == false, and -// error == SUCCESS. -// +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_really_inline field::field() noexcept : std::pair() {} -simdjson_really_inline array::array(const value_iterator &_iter) noexcept - : iter{_iter} +simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) { } -simdjson_really_inline simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_really_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); + +simdjson_really_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_really_inline array array::started(value_iterator &iter) noexcept { - simdjson_unused bool has_value = iter.started_array(); - return array(iter); + +simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.string_buf_loc()); + first.consume(); + return answer; } -simdjson_really_inline simdjson_result array::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); +simdjson_really_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_really_inline simdjson_result array::end() noexcept { - return array_iterator(iter); + +simdjson_really_inline value &field::value() & noexcept { + return second; +} + +simdjson_really_inline value field::value() && noexcept { + return std::forward(*this).second; } } // namespace ondemand @@ -22859,153 +28418,199 @@ simdjson_really_inline simdjson_result array::end() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_really_inline simdjson_result::simdjson_result( error_code error -) noexcept - : implementation_simdjson_result_base(error) +) noexcept : + implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_really_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return first.begin(); + return first.key(); } -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { +simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { if (error()) { return error(); } - return first.end(); + return first.unescaped_key(); +} +simdjson_really_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); } } // namespace simdjson -/* end file include/simdjson/generic/ondemand/array-inl.h */ -/* begin file include/simdjson/generic/ondemand/document-inl.h */ +/* end file include/simdjson/generic/ondemand/field-inl.h */ +/* begin file include/simdjson/generic/ondemand/object-inl.h */ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline document::document(ondemand::json_iterator &&_iter) noexcept - : iter{std::forward(_iter)} -{ - logger::log_start_value(iter, "document"); -} - -simdjson_really_inline document document::start(json_iterator &&iter) noexcept { - return document(std::forward(iter)); -} - -simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { - return value_iterator(&iter, 1, iter.root_checkpoint()); -} -simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { - return resume_value_iterator(); -} -simdjson_really_inline value document::resume_value() noexcept { - return resume_value_iterator(); -} -simdjson_really_inline simdjson_result document::get_array() & noexcept { - auto value = get_root_value_iterator(); - return array::start_root(value); -} -simdjson_really_inline simdjson_result document::get_object() & noexcept { - auto value = get_root_value_iterator(); - return object::start_root(value); -} -simdjson_really_inline simdjson_result document::get_uint64() noexcept { - return get_root_value_iterator().get_root_uint64(); -} -simdjson_really_inline simdjson_result document::get_int64() noexcept { - return get_root_value_iterator().get_root_int64(); +simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); } -simdjson_really_inline simdjson_result document::get_double() noexcept { - return get_root_value_iterator().get_root_double(); +simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); } -simdjson_really_inline simdjson_result document::get_string() noexcept { - return get_root_value_iterator().get_root_string(); +simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { - return get_root_value_iterator().get_root_raw_json_string(); +simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } -simdjson_really_inline simdjson_result document::get_bool() noexcept { - return get_root_value_iterator().get_root_bool(); +simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); } -simdjson_really_inline bool document::is_null() noexcept { - return get_root_value_iterator().is_root_null(); +simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } - -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } - -template simdjson_really_inline error_code document::get(T &out) & noexcept { - return get().get(out); +simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } -template simdjson_really_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); +simdjson_really_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } -simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } -simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } -simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } -#endif - -simdjson_really_inline simdjson_result document::begin() & noexcept { - return get_array().begin(); +simdjson_really_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_really_inline simdjson_result document::end() & noexcept { - return {}; + +simdjson_really_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return resume_value().find_field(key); +simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { - return resume_value().find_field(key); + +simdjson_really_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return resume_value().find_field_unordered(key); + +simdjson_really_inline simdjson_result object::begin() noexcept { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); } -simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return resume_value().find_field_unordered(key); +simdjson_really_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return resume_value()[key]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { - return resume_value()[key]; + +simdjson_really_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_really_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_really_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_really_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +simdjson_really_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } } // namespace ondemand @@ -23014,759 +28619,828 @@ simdjson_really_inline simdjson_result document::raw_json_toke namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } - return first.find_field_unordered(key); + return first.end(); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } - return first[key]; + return std::forward(first).find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } - return first.find_field(key); + return std::forward(first)[key]; } -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } - return first.is_null(); + return std::forward(first).find_field(key); } -template -simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.at_pointer(json_pointer); } -template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { +inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; + return first.reset(); } -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } - return first.type(); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + return first.is_empty(); } -#endif -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } - return first.raw_json_token(); + return first.count_fields(); } } // namespace simdjson -/* end file include/simdjson/generic/ondemand/document-inl.h */ -/* begin file include/simdjson/generic/ondemand/value-inl.h */ +/* end file include/simdjson/generic/ondemand/object-inl.h */ +/* begin file include/simdjson/generic/ondemand/parser-inl.h */ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_really_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_really_inline value value::resume(const value_iterator &iter) noexcept { - return iter; +simdjson_really_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -simdjson_really_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_really_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); +simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { - return object::resume(iter); + SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; } -simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_really_inline simdjson_result value::get_string() noexcept { - return iter.get_string(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_really_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_really_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_really_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_really_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_really_inline bool value::is_null() noexcept { - return iter.is_null(); + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } -template simdjson_really_inline error_code value::get(T &out) noexcept { - return get().get(out); -} + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline value::operator array() noexcept(false) { - return get_array(); + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_really_inline value::operator object() noexcept(false) { - return get_object(); + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); } -simdjson_really_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size); } -simdjson_really_inline value::operator int64_t() noexcept(false) { - return get_int64(); +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); } -simdjson_really_inline value::operator double() noexcept(false) { - return get_double(); +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); } -simdjson_really_inline value::operator std::string_view() noexcept(false) { - return get_string(); + +simdjson_really_inline size_t parser::capacity() const noexcept { + return _capacity; } -simdjson_really_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); +simdjson_really_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; } -simdjson_really_inline value::operator bool() noexcept(false) { - return get_bool(); +simdjson_really_inline size_t parser::max_depth() const noexcept { + return _max_depth; } -#endif -simdjson_really_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_really_inline simdjson_result value::end() & noexcept { - return {}; +simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + size_t MINIMAL_DOCUMENT_CAPACITY = 32; + if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } } -simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_really_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson -simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_really_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/parser-inl.h */ +/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */ +#include +#include +#include +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); } -simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); } -simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); } -simdjson_really_inline simdjson_result value::type() noexcept { - return iter.type(); + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } } -simdjson_really_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); } -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson +#endif // SIMDJSON_THREADS_ENABLED -namespace simdjson { +simdjson_really_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) +simdjson_really_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif { } -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) + +simdjson_really_inline document_stream::~document_stream() noexcept { + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); +simdjson_really_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return false; } - return first.is_null(); +simdjson_really_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); } -template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_really_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; +simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_really_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -#endif -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; } -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value-inl.h */ -/* begin file include/simdjson/generic/ondemand/field-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { +simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_really_inline field::field() noexcept : std::pair() {} + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } -simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ -} + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } -simdjson_really_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } -simdjson_really_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; } -simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.string_buf_loc()); - first.consume(); - return answer; -} +#ifdef SIMDJSON_THREADS_ENABLED -simdjson_really_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; -} +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_really_inline value &field::value() & noexcept { - return second; + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_really_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } +#endif // SIMDJSON_THREADS_ENABLED + } // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION } // namespace simdjson namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value +simdjson_really_inline simdjson_result::simdjson_result( + error_code error ) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) + implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result::simdjson_result( - error_code error +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base( + std::forward(value) + ) { } -simdjson_really_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); -} -simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { - if (error()) { return error(); } - return first.unescaped_key(); -} -simdjson_really_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); } +/* end file include/simdjson/generic/ondemand/document_stream-inl.h */ +/* begin file include/simdjson/generic/ondemand/serialization-inl.h */ + -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/field-inl.h */ -/* begin file include/simdjson/generic/ondemand/object-inl.h */ namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); -} -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); + + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { - // We don't need to know if the object is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_object().get(has_value) ); - return object(iter); +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } } -simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_object().get(has_value) ); - return object(iter); + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_really_inline object object::started(value_iterator &iter) noexcept { - simdjson_unused bool has_value = iter.started_object(); - return iter; + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { - return iter; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_really_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_really_inline simdjson_result object::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_really_inline simdjson_result object::end() noexcept { - return object_iterator(iter); + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} } // namespace simdjson -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; -} -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object-inl.h */ -/* begin file include/simdjson/generic/ondemand/parser-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } - - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); -#endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - SIMDJSON_TRY( simdjson::active_implementation->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + return (out << error); } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; } +#endif -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), false) ); - return document::start({ reinterpret_cast(json.data()), this }); } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), false) ); - return json_iterator(reinterpret_cast(json.data()), this); } +#endif -simdjson_really_inline size_t parser::capacity() const noexcept { - return _capacity; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_really_inline size_t parser::max_depth() const noexcept { - return _max_depth; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/parser-inl.h */ +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +/* end file include/simdjson/generic/ondemand/serialization-inl.h */ /* end file include/simdjson/generic/ondemand-inl.h */