|
1 | 1 | #include "write.h" |
2 | 2 |
|
| 3 | +#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h> |
| 4 | +#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h> |
| 5 | +#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h> |
| 6 | +#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h> |
| 7 | +#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h> |
| 8 | +#include <contrib/libs/simdjson/include/simdjson/ondemand.h> |
3 | 9 | #include <library/cpp/json/json_reader.h> |
4 | 10 |
|
5 | 11 | #include <util/generic/vector.h> |
@@ -74,38 +80,29 @@ struct TJsonIndex { |
74 | 80 | ui32 InternKey(const TStringBuf value) { |
75 | 81 | TotalKeysCount++; |
76 | 82 |
|
77 | | - const auto it = Keys.find(value); |
78 | | - if (it == Keys.end()) { |
79 | | - const ui32 currentIndex = LastFreeStringIndex++; |
80 | | - Keys[TString(value)] = currentIndex; |
| 83 | + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); |
| 84 | + if (emplaced) { |
| 85 | + ++LastFreeStringIndex; |
81 | 86 | TotalKeyLength += value.length() + 1; |
82 | | - return currentIndex; |
83 | | - } else { |
84 | | - return it->second; |
85 | 87 | } |
| 88 | + return it->second; |
86 | 89 | } |
87 | 90 |
|
88 | 91 | ui32 InternString(const TStringBuf value) { |
89 | | - const auto it = Strings.find(value); |
90 | | - if (it == Strings.end()) { |
91 | | - const ui32 currentIndex = LastFreeStringIndex++; |
92 | | - Strings[value] = currentIndex; |
| 92 | + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); |
| 93 | + if (emplaced) { |
| 94 | + ++LastFreeStringIndex; |
93 | 95 | TotalStringLength += value.length() + 1; |
94 | | - return currentIndex; |
95 | | - } else { |
96 | | - return it->second; |
97 | 96 | } |
| 97 | + return it->second; |
98 | 98 | } |
99 | 99 |
|
100 | 100 | ui32 InternNumber(double value) { |
101 | | - const auto it = Numbers.find(value); |
102 | | - if (it == Numbers.end()) { |
103 | | - const ui32 currentIndex = LastFreeNumberIndex++; |
104 | | - Numbers[value] = currentIndex; |
105 | | - return currentIndex; |
106 | | - } else { |
107 | | - return it->second; |
| 101 | + const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex); |
| 102 | + if (emplaced) { |
| 103 | + ++LastFreeNumberIndex; |
108 | 104 | } |
| 105 | + return it->second; |
109 | 106 | } |
110 | 107 |
|
111 | 108 | void AddContainer(EContainerType type) { |
@@ -551,17 +548,189 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call |
551 | 548 | } |
552 | 549 | } |
553 | 550 |
|
| 551 | +// unused, left for performance comparison |
| 552 | +template <typename TOnDemandValue> |
| 553 | + requires std::is_same_v<TOnDemandValue, simdjson::ondemand::value> || std::is_same_v<TOnDemandValue, simdjson::ondemand::document> |
| 554 | +[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) { |
| 555 | +#define RETURN_IF_NOT_SUCCESS(error) \ |
| 556 | + if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \ |
| 557 | + return error; \ |
| 558 | + } |
| 559 | + |
| 560 | + switch (value.type()) { |
| 561 | + case simdjson::ondemand::json_type::string: { |
| 562 | + std::string_view v; |
| 563 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 564 | + callbacks.OnString(v); |
| 565 | + break; |
| 566 | + } |
| 567 | + case simdjson::ondemand::json_type::boolean: { |
| 568 | + bool v; |
| 569 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 570 | + callbacks.OnBoolean(v); |
| 571 | + break; |
| 572 | + } |
| 573 | + case simdjson::ondemand::json_type::number: { |
| 574 | + switch (value.get_number_type()) { |
| 575 | + case simdjson::fallback::number_type::floating_point_number: { |
| 576 | + double v; |
| 577 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 578 | + callbacks.OnDouble(v); |
| 579 | + break; |
| 580 | + } |
| 581 | + case simdjson::fallback::number_type::signed_integer: { |
| 582 | + i64 v; |
| 583 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 584 | + callbacks.OnInteger(v); |
| 585 | + break; |
| 586 | + } |
| 587 | + case simdjson::fallback::number_type::unsigned_integer: { |
| 588 | + ui64 v; |
| 589 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 590 | + callbacks.OnUInteger(v); |
| 591 | + break; |
| 592 | + } |
| 593 | + case simdjson::fallback::number_type::big_integer: |
| 594 | + return simdjson::NUMBER_OUT_OF_RANGE; |
| 595 | + } |
| 596 | + break; |
| 597 | + } |
| 598 | + case simdjson::ondemand::json_type::null: |
| 599 | + callbacks.OnNull(); |
| 600 | + break; |
| 601 | + case simdjson::ondemand::json_type::array: { |
| 602 | + callbacks.OnOpenArray(); |
| 603 | + |
| 604 | + simdjson::ondemand::array v; |
| 605 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 606 | + for (auto item : v) { |
| 607 | + RETURN_IF_NOT_SUCCESS(item.error()); |
| 608 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value_unsafe(), callbacks)); |
| 609 | + } |
| 610 | + |
| 611 | + callbacks.OnCloseArray(); |
| 612 | + break; |
| 613 | + } |
| 614 | + case simdjson::ondemand::json_type::object: { |
| 615 | + callbacks.OnOpenMap(); |
| 616 | + |
| 617 | + simdjson::ondemand::object v; |
| 618 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 619 | + for (auto item : v) { |
| 620 | + RETURN_IF_NOT_SUCCESS(item.error()); |
| 621 | + auto& keyValue = item.value_unsafe(); |
| 622 | + const auto key = keyValue.unescaped_key(); |
| 623 | + RETURN_IF_NOT_SUCCESS(key.error()); |
| 624 | + callbacks.OnMapKey(key.value_unsafe()); |
| 625 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(keyValue.value(), callbacks)); |
| 626 | + } |
| 627 | + |
| 628 | + callbacks.OnCloseMap(); |
| 629 | + break; |
| 630 | + } |
| 631 | + } |
| 632 | + |
| 633 | + return simdjson::SUCCESS; |
| 634 | + |
| 635 | +#undef RETURN_IF_NOT_SUCCESS |
554 | 636 | } |
555 | 637 |
|
556 | | -TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) { |
| 638 | +[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { |
| 639 | +#define RETURN_IF_NOT_SUCCESS(status) \ |
| 640 | + if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \ |
| 641 | + return status; \ |
| 642 | + } |
| 643 | + |
| 644 | + switch (value.type()) { |
| 645 | + case simdjson::dom::element_type::STRING: { |
| 646 | + std::string_view v; |
| 647 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 648 | + callbacks.OnString(v); |
| 649 | + break; |
| 650 | + } |
| 651 | + case simdjson::dom::element_type::BOOL: { |
| 652 | + bool v; |
| 653 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 654 | + callbacks.OnBoolean(v); |
| 655 | + break; |
| 656 | + } |
| 657 | + case simdjson::dom::element_type::INT64: { |
| 658 | + i64 v; |
| 659 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 660 | + callbacks.OnInteger(v); |
| 661 | + break; |
| 662 | + } |
| 663 | + case simdjson::dom::element_type::UINT64: { |
| 664 | + ui64 v; |
| 665 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 666 | + callbacks.OnUInteger(v); |
| 667 | + break; |
| 668 | + } |
| 669 | + case simdjson::dom::element_type::DOUBLE: { |
| 670 | + double v; |
| 671 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 672 | + callbacks.OnDouble(v); |
| 673 | + break; |
| 674 | + } |
| 675 | + case simdjson::dom::element_type::NULL_VALUE: |
| 676 | + callbacks.OnNull(); |
| 677 | + break; |
| 678 | + case simdjson::dom::element_type::ARRAY: { |
| 679 | + callbacks.OnOpenArray(); |
| 680 | + |
| 681 | + simdjson::dom::array v; |
| 682 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 683 | + for (const auto& item : v) { |
| 684 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks)); |
| 685 | + } |
| 686 | + |
| 687 | + callbacks.OnCloseArray(); |
| 688 | + break; |
| 689 | + } |
| 690 | + case simdjson::dom::element_type::OBJECT: { |
| 691 | + callbacks.OnOpenMap(); |
| 692 | + |
| 693 | + simdjson::dom::object v; |
| 694 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 695 | + for (const auto& item : v) { |
| 696 | + callbacks.OnMapKey(item.key); |
| 697 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks)); |
| 698 | + } |
| 699 | + |
| 700 | + callbacks.OnCloseMap(); |
| 701 | + break; |
| 702 | + } |
| 703 | + } |
| 704 | + return simdjson::SUCCESS; |
| 705 | +#undef RETURN_IF_NOT_SUCCESS |
| 706 | +} |
| 707 | +} |
| 708 | + |
| 709 | +TMaybe<TBinaryJson> SerializeToBinaryJsonImplRapidjson(const TStringBuf json) { |
557 | 710 | TMemoryInput input(json.data(), json.size()); |
558 | 711 | TBinaryJsonCallbacks callbacks(/* throwException */ false); |
559 | 712 | if (!ReadJson(&input, &callbacks)) { |
560 | 713 | return Nothing(); |
561 | 714 | } |
562 | 715 | TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); |
563 | 716 | return std::move(serializer).Serialize(); |
| 717 | +} |
564 | 718 |
|
| 719 | +TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) { |
| 720 | + thread_local simdjson::dom::parser parser; |
| 721 | + auto doc = parser.parse(json); |
| 722 | + if (doc.error() != simdjson::SUCCESS) { |
| 723 | + if (doc.error() == simdjson::BIGINT_ERROR) { |
| 724 | + return SerializeToBinaryJsonImplRapidjson(json); |
| 725 | + } |
| 726 | + return Nothing(); |
| 727 | + } |
| 728 | + TBinaryJsonCallbacks callbacks(/* throwException */ false); |
| 729 | + if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) { |
| 730 | + return Nothing(); |
| 731 | + } |
| 732 | + TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); |
| 733 | + return std::move(serializer).Serialize(); |
565 | 734 | } |
566 | 735 |
|
567 | 736 | TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) { |
|
0 commit comments