|
5 | 5 | #include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h> |
6 | 6 | #include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h> |
7 | 7 | #include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h> |
| 8 | +#include <contrib/libs/simdjson/include/simdjson/ondemand.h> |
8 | 9 | #include <library/cpp/json/json_reader.h> |
9 | 10 |
|
10 | 11 | #include <util/generic/vector.h> |
@@ -547,6 +548,93 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call |
547 | 548 | } |
548 | 549 | } |
549 | 550 |
|
| 551 | +// unused, left for performance comparison |
| 552 | +template <typename TOnDemandValue> |
| 553 | + requires std::is_same_v<TOnDemandValue, simdjson::ondemand::value> || std::is_same_v<TOnDemandValue, simdjson::ondemand::document> |
| 554 | +[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { |
| 555 | +#define RETURN_IF_NOT_SUCCESS(error) \ |
| 556 | + if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \ |
| 557 | + return error; \ |
| 558 | + } |
| 559 | + |
| 560 | + switch (value.type()) { |
| 561 | + case simdjson::ondemand::json_type::string: { |
| 562 | + std::string_view v; |
| 563 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 564 | + callbacks.OnString(v); |
| 565 | + break; |
| 566 | + } |
| 567 | + case simdjson::ondemand::json_type::boolean: { |
| 568 | + bool v; |
| 569 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 570 | + callbacks.OnBoolean(v); |
| 571 | + break; |
| 572 | + } |
| 573 | + case simdjson::ondemand::json_type::number: { |
| 574 | + switch (value.get_number_type()) { |
| 575 | + case simdjson::fallback::number_type::floating_point_number: { |
| 576 | + double v; |
| 577 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 578 | + callbacks.OnDouble(v); |
| 579 | + break; |
| 580 | + } |
| 581 | + case simdjson::fallback::number_type::signed_integer: { |
| 582 | + i64 v; |
| 583 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 584 | + callbacks.OnInteger(v); |
| 585 | + break; |
| 586 | + } |
| 587 | + case simdjson::fallback::number_type::unsigned_integer: { |
| 588 | + ui64 v; |
| 589 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 590 | + callbacks.OnUInteger(v); |
| 591 | + break; |
| 592 | + } |
| 593 | + case simdjson::fallback::number_type::big_integer: |
| 594 | + return simdjson::NUMBER_OUT_OF_RANGE; |
| 595 | + } |
| 596 | + break; |
| 597 | + } |
| 598 | + case simdjson::ondemand::json_type::null: |
| 599 | + callbacks.OnNull(); |
| 600 | + break; |
| 601 | + case simdjson::ondemand::json_type::array: { |
| 602 | + callbacks.OnOpenArray(); |
| 603 | + |
| 604 | + simdjson::ondemand::array v; |
| 605 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 606 | + for (auto item : v) { |
| 607 | + RETURN_IF_NOT_SUCCESS(item.error()); |
| 608 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value_unsafe(), callbacks)); |
| 609 | + } |
| 610 | + |
| 611 | + callbacks.OnCloseArray(); |
| 612 | + break; |
| 613 | + } |
| 614 | + case simdjson::ondemand::json_type::object: { |
| 615 | + callbacks.OnOpenMap(); |
| 616 | + |
| 617 | + simdjson::ondemand::object v; |
| 618 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 619 | + for (auto item : v) { |
| 620 | + RETURN_IF_NOT_SUCCESS(item.error()); |
| 621 | + auto& keyValue = item.value_unsafe(); |
| 622 | + const auto key = keyValue.unescaped_key(); |
| 623 | + RETURN_IF_NOT_SUCCESS(key.error()); |
| 624 | + callbacks.OnMapKey(key.value_unsafe()); |
| 625 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(keyValue.value(), callbacks)); |
| 626 | + } |
| 627 | + |
| 628 | + callbacks.OnCloseMap(); |
| 629 | + break; |
| 630 | + } |
| 631 | + } |
| 632 | + |
| 633 | + return simdjson::SUCCESS; |
| 634 | + |
| 635 | +#undef RETURN_IF_NOT_SUCCESS |
| 636 | +} |
| 637 | + |
550 | 638 | [[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { |
551 | 639 | #define RETURN_IF_NOT_SUCCESS(status) \ |
552 | 640 | if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \ |
@@ -618,10 +706,23 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call |
618 | 706 | } |
619 | 707 | } |
620 | 708 |
|
| 709 | +TMaybe<TBinaryJson> SerializeToBinaryJsonImplRapidjson(const TStringBuf json) { |
| 710 | + TMemoryInput input(json.data(), json.size()); |
| 711 | + TBinaryJsonCallbacks callbacks(/* throwException */ false); |
| 712 | + if (!ReadJson(&input, &callbacks)) { |
| 713 | + return Nothing(); |
| 714 | + } |
| 715 | + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); |
| 716 | + return std::move(serializer).Serialize(); |
| 717 | +} |
| 718 | + |
621 | 719 | TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) { |
622 | | - simdjson::dom::parser parser; |
| 720 | + thread_local simdjson::dom::parser parser; |
623 | 721 | auto doc = parser.parse(json); |
624 | 722 | if (doc.error() != simdjson::SUCCESS) { |
| 723 | + if (doc.error() == simdjson::BIGINT_ERROR) { |
| 724 | + return SerializeToBinaryJsonImplRapidjson(json); |
| 725 | + } |
625 | 726 | return Nothing(); |
626 | 727 | } |
627 | 728 | TBinaryJsonCallbacks callbacks(/* throwException */ false); |
|
0 commit comments