|
1 | 1 | #include "write.h" |
2 | 2 |
|
| 3 | +#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h> |
| 4 | +#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h> |
| 5 | +#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h> |
| 6 | +#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h> |
| 7 | +#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h> |
3 | 8 | #include <library/cpp/json/json_reader.h> |
4 | 9 |
|
5 | 10 | #include <util/generic/vector.h> |
@@ -74,38 +79,29 @@ struct TJsonIndex { |
74 | 79 | ui32 InternKey(const TStringBuf value) { |
75 | 80 | TotalKeysCount++; |
76 | 81 |
|
77 | | - const auto it = Keys.find(value); |
78 | | - if (it == Keys.end()) { |
79 | | - const ui32 currentIndex = LastFreeStringIndex++; |
80 | | - Keys[TString(value)] = currentIndex; |
| 82 | + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); |
| 83 | + if (emplaced) { |
| 84 | + ++LastFreeStringIndex; |
81 | 85 | TotalKeyLength += value.length() + 1; |
82 | | - return currentIndex; |
83 | | - } else { |
84 | | - return it->second; |
85 | 86 | } |
| 87 | + return it->second; |
86 | 88 | } |
87 | 89 |
|
88 | 90 | ui32 InternString(const TStringBuf value) { |
89 | | - const auto it = Strings.find(value); |
90 | | - if (it == Strings.end()) { |
91 | | - const ui32 currentIndex = LastFreeStringIndex++; |
92 | | - Strings[value] = currentIndex; |
| 91 | + const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex); |
| 92 | + if (emplaced) { |
| 93 | + ++LastFreeStringIndex; |
93 | 94 | TotalStringLength += value.length() + 1; |
94 | | - return currentIndex; |
95 | | - } else { |
96 | | - return it->second; |
97 | 95 | } |
| 96 | + return it->second; |
98 | 97 | } |
99 | 98 |
|
100 | 99 | ui32 InternNumber(double value) { |
101 | | - const auto it = Numbers.find(value); |
102 | | - if (it == Numbers.end()) { |
103 | | - const ui32 currentIndex = LastFreeNumberIndex++; |
104 | | - Numbers[value] = currentIndex; |
105 | | - return currentIndex; |
106 | | - } else { |
107 | | - return it->second; |
| 100 | + const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex); |
| 101 | + if (emplaced) { |
| 102 | + ++LastFreeNumberIndex; |
108 | 103 | } |
| 104 | + return it->second; |
109 | 105 | } |
110 | 106 |
|
111 | 107 | void AddContainer(EContainerType type) { |
@@ -551,17 +547,89 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call |
551 | 547 | } |
552 | 548 | } |
553 | 549 |
|
| 550 | +[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) { |
| 551 | +#define RETURN_IF_NOT_SUCCESS(status) \ |
| 552 | + if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \ |
| 553 | + return status; \ |
| 554 | + } |
| 555 | + |
| 556 | + switch (value.type()) { |
| 557 | + case simdjson::dom::element_type::STRING: { |
| 558 | + std::string_view v; |
| 559 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 560 | + callbacks.OnString(v); |
| 561 | + break; |
| 562 | + } |
| 563 | + case simdjson::dom::element_type::BOOL: { |
| 564 | + bool v; |
| 565 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 566 | + callbacks.OnBoolean(v); |
| 567 | + break; |
| 568 | + } |
| 569 | + case simdjson::dom::element_type::INT64: { |
| 570 | + i64 v; |
| 571 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 572 | + callbacks.OnInteger(v); |
| 573 | + break; |
| 574 | + } |
| 575 | + case simdjson::dom::element_type::UINT64: { |
| 576 | + ui64 v; |
| 577 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 578 | + callbacks.OnUInteger(v); |
| 579 | + break; |
| 580 | + } |
| 581 | + case simdjson::dom::element_type::DOUBLE: { |
| 582 | + double v; |
| 583 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 584 | + callbacks.OnDouble(v); |
| 585 | + break; |
| 586 | + } |
| 587 | + case simdjson::dom::element_type::NULL_VALUE: |
| 588 | + callbacks.OnNull(); |
| 589 | + break; |
| 590 | + case simdjson::dom::element_type::ARRAY: { |
| 591 | + callbacks.OnOpenArray(); |
| 592 | + |
| 593 | + simdjson::dom::array v; |
| 594 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 595 | + for (const auto& item : v) { |
| 596 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks)); |
| 597 | + } |
| 598 | + |
| 599 | + callbacks.OnCloseArray(); |
| 600 | + break; |
| 601 | + } |
| 602 | + case simdjson::dom::element_type::OBJECT: { |
| 603 | + callbacks.OnOpenMap(); |
| 604 | + |
| 605 | + simdjson::dom::object v; |
| 606 | + RETURN_IF_NOT_SUCCESS(value.get(v)); |
| 607 | + for (const auto& item : v) { |
| 608 | + callbacks.OnMapKey(item.key); |
| 609 | + RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks)); |
| 610 | + } |
| 611 | + |
| 612 | + callbacks.OnCloseMap(); |
| 613 | + break; |
| 614 | + } |
| 615 | + } |
| 616 | + return simdjson::SUCCESS; |
| 617 | +#undef RETURN_IF_NOT_SUCCESS |
| 618 | +} |
554 | 619 | } |
555 | 620 |
|
556 | 621 | TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) { |
557 | | - TMemoryInput input(json.data(), json.size()); |
| 622 | + simdjson::dom::parser parser; |
| 623 | + auto doc = parser.parse(json); |
| 624 | + if (doc.error() != simdjson::SUCCESS) { |
| 625 | + return Nothing(); |
| 626 | + } |
558 | 627 | TBinaryJsonCallbacks callbacks(/* throwException */ false); |
559 | | - if (!ReadJson(&input, &callbacks)) { |
| 628 | + if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) { |
560 | 629 | return Nothing(); |
561 | 630 | } |
562 | 631 | TBinaryJsonSerializer serializer(std::move(callbacks).GetResult()); |
563 | 632 | return std::move(serializer).Serialize(); |
564 | | - |
565 | 633 | } |
566 | 634 |
|
567 | 635 | TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) { |
|
0 commit comments