Skip to content

Commit a4cdb4e

Browse files
authored
Merge 3b3e4b4 into 4fc77b1
2 parents 4fc77b1 + 3b3e4b4 commit a4cdb4e

File tree

4 files changed

+269
-22
lines changed

4 files changed

+269
-22
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include <benchmark/benchmark.h>
2+
3+
#include <util/random/random.h>
4+
#include <library/cpp/testing/unittest/registar.h>
5+
#include <library/cpp/json/json_value.h>
6+
#include <library/cpp/json/json_writer.h>
7+
8+
#include <ydb/library/binary_json/write.h>
9+
10+
// ya test -r -D BENCHMARK_MAKE_LARGE_PART
11+
#ifndef BENCHMARK_MAKE_LARGE_PART
12+
#define BENCHMARK_MAKE_LARGE_PART 0
13+
#endif
14+
15+
using namespace NKikimr::NBinaryJson;
16+
17+
namespace {
18+
19+
static ui64 seed = 0;
20+
21+
NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) {
22+
NJson::TJsonValue value;
23+
if (depth == 1) {
24+
value.SetValue(NUnitTest::RandomString(10, seed++));
25+
return value;
26+
}
27+
for (ui64 i = 0; i < nChildren; ++i) {
28+
value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1));
29+
}
30+
return value;
31+
}
32+
33+
TString GetTestJsonString() {
34+
seed = 42;
35+
return NJson::WriteJson(GetTestJson(2, 100));
36+
}
37+
38+
static void BenchWriteSimdJson(benchmark::State& state) {
39+
TString value = GetTestJsonString();
40+
TStringBuf buf(value);
41+
for (auto _ : state) {
42+
auto result = SerializeToBinaryJson(buf);
43+
benchmark::DoNotOptimize(result);
44+
benchmark::ClobberMemory();
45+
}
46+
}
47+
48+
}
49+
50+
BENCHMARK(BenchWriteSimdJson);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
G_BENCHMARK()
2+
3+
TAG(ya:fat)
4+
SIZE(LARGE)
5+
TIMEOUT(600)
6+
7+
IF (BENCHMARK_MAKE_LARGE_PART)
8+
CFLAGS(
9+
-DBENCHMARK_MAKE_LARGE_PART=1
10+
)
11+
TIMEOUT(1200)
12+
ENDIF()
13+
14+
SRCS(
15+
write.cpp
16+
)
17+
18+
PEERDIR(
19+
library/cpp/testing/unittest
20+
ydb/library/binary_json
21+
ydb/library/yql/public/udf/service/exception_policy
22+
)
23+
24+
YQL_LAST_ABI_VERSION()
25+
26+
END()

ydb/library/binary_json/write.cpp

Lines changed: 191 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
#include "write.h"
22

3+
#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h>
4+
#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h>
5+
#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h>
6+
#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h>
7+
#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h>
8+
#include <contrib/libs/simdjson/include/simdjson/ondemand.h>
39
#include <library/cpp/json/json_reader.h>
410

511
#include <util/generic/vector.h>
@@ -74,38 +80,29 @@ struct TJsonIndex {
7480
ui32 InternKey(const TStringBuf value) {
7581
TotalKeysCount++;
7682

77-
const auto it = Keys.find(value);
78-
if (it == Keys.end()) {
79-
const ui32 currentIndex = LastFreeStringIndex++;
80-
Keys[TString(value)] = currentIndex;
83+
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
84+
if (emplaced) {
85+
++LastFreeStringIndex;
8186
TotalKeyLength += value.length() + 1;
82-
return currentIndex;
83-
} else {
84-
return it->second;
8587
}
88+
return it->second;
8689
}
8790

8891
ui32 InternString(const TStringBuf value) {
89-
const auto it = Strings.find(value);
90-
if (it == Strings.end()) {
91-
const ui32 currentIndex = LastFreeStringIndex++;
92-
Strings[value] = currentIndex;
92+
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
93+
if (emplaced) {
94+
++LastFreeStringIndex;
9395
TotalStringLength += value.length() + 1;
94-
return currentIndex;
95-
} else {
96-
return it->second;
9796
}
97+
return it->second;
9898
}
9999

100100
ui32 InternNumber(double value) {
101-
const auto it = Numbers.find(value);
102-
if (it == Numbers.end()) {
103-
const ui32 currentIndex = LastFreeNumberIndex++;
104-
Numbers[value] = currentIndex;
105-
return currentIndex;
106-
} else {
107-
return it->second;
101+
const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex);
102+
if (emplaced) {
103+
++LastFreeNumberIndex;
108104
}
105+
return it->second;
109106
}
110107

111108
void AddContainer(EContainerType type) {
@@ -551,17 +548,189 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call
551548
}
552549
}
553550

551+
// unused, left for performance comparison
552+
template <typename TOnDemandValue>
553+
requires std::is_same_v<TOnDemandValue, simdjson::ondemand::value> || std::is_same_v<TOnDemandValue, simdjson::ondemand::document>
554+
[[maybe_unused]] [[nodiscard]] simdjson::error_code SimdJsonToJsonIndexImpl(TOnDemandValue& value, TBinaryJsonCallbacks& callbacks) {
555+
#define RETURN_IF_NOT_SUCCESS(error) \
556+
if (Y_UNLIKELY(error != simdjson::SUCCESS)) { \
557+
return error; \
558+
}
559+
560+
switch (value.type()) {
561+
case simdjson::ondemand::json_type::string: {
562+
std::string_view v;
563+
RETURN_IF_NOT_SUCCESS(value.get(v));
564+
callbacks.OnString(v);
565+
break;
566+
}
567+
case simdjson::ondemand::json_type::boolean: {
568+
bool v;
569+
RETURN_IF_NOT_SUCCESS(value.get(v));
570+
callbacks.OnBoolean(v);
571+
break;
572+
}
573+
case simdjson::ondemand::json_type::number: {
574+
switch (value.get_number_type()) {
575+
case simdjson::fallback::number_type::floating_point_number: {
576+
double v;
577+
RETURN_IF_NOT_SUCCESS(value.get(v));
578+
callbacks.OnDouble(v);
579+
break;
580+
}
581+
case simdjson::fallback::number_type::signed_integer: {
582+
i64 v;
583+
RETURN_IF_NOT_SUCCESS(value.get(v));
584+
callbacks.OnInteger(v);
585+
break;
586+
}
587+
case simdjson::fallback::number_type::unsigned_integer: {
588+
ui64 v;
589+
RETURN_IF_NOT_SUCCESS(value.get(v));
590+
callbacks.OnUInteger(v);
591+
break;
592+
}
593+
case simdjson::fallback::number_type::big_integer:
594+
return simdjson::NUMBER_OUT_OF_RANGE;
595+
}
596+
break;
597+
}
598+
case simdjson::ondemand::json_type::null:
599+
callbacks.OnNull();
600+
break;
601+
case simdjson::ondemand::json_type::array: {
602+
callbacks.OnOpenArray();
603+
604+
simdjson::ondemand::array v;
605+
RETURN_IF_NOT_SUCCESS(value.get(v));
606+
for (auto item : v) {
607+
RETURN_IF_NOT_SUCCESS(item.error());
608+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(item.value_unsafe(), callbacks));
609+
}
610+
611+
callbacks.OnCloseArray();
612+
break;
613+
}
614+
case simdjson::ondemand::json_type::object: {
615+
callbacks.OnOpenMap();
616+
617+
simdjson::ondemand::object v;
618+
RETURN_IF_NOT_SUCCESS(value.get(v));
619+
for (auto item : v) {
620+
RETURN_IF_NOT_SUCCESS(item.error());
621+
auto& keyValue = item.value_unsafe();
622+
const auto key = keyValue.unescaped_key();
623+
RETURN_IF_NOT_SUCCESS(key.error());
624+
callbacks.OnMapKey(key.value_unsafe());
625+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndexImpl(keyValue.value(), callbacks));
626+
}
627+
628+
callbacks.OnCloseMap();
629+
break;
630+
}
631+
}
632+
633+
return simdjson::SUCCESS;
634+
635+
#undef RETURN_IF_NOT_SUCCESS
554636
}
555637

556-
TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
638+
[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
639+
#define RETURN_IF_NOT_SUCCESS(status) \
640+
if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \
641+
return status; \
642+
}
643+
644+
switch (value.type()) {
645+
case simdjson::dom::element_type::STRING: {
646+
std::string_view v;
647+
RETURN_IF_NOT_SUCCESS(value.get(v));
648+
callbacks.OnString(v);
649+
break;
650+
}
651+
case simdjson::dom::element_type::BOOL: {
652+
bool v;
653+
RETURN_IF_NOT_SUCCESS(value.get(v));
654+
callbacks.OnBoolean(v);
655+
break;
656+
}
657+
case simdjson::dom::element_type::INT64: {
658+
i64 v;
659+
RETURN_IF_NOT_SUCCESS(value.get(v));
660+
callbacks.OnInteger(v);
661+
break;
662+
}
663+
case simdjson::dom::element_type::UINT64: {
664+
ui64 v;
665+
RETURN_IF_NOT_SUCCESS(value.get(v));
666+
callbacks.OnUInteger(v);
667+
break;
668+
}
669+
case simdjson::dom::element_type::DOUBLE: {
670+
double v;
671+
RETURN_IF_NOT_SUCCESS(value.get(v));
672+
callbacks.OnDouble(v);
673+
break;
674+
}
675+
case simdjson::dom::element_type::NULL_VALUE:
676+
callbacks.OnNull();
677+
break;
678+
case simdjson::dom::element_type::ARRAY: {
679+
callbacks.OnOpenArray();
680+
681+
simdjson::dom::array v;
682+
RETURN_IF_NOT_SUCCESS(value.get(v));
683+
for (const auto& item : v) {
684+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks));
685+
}
686+
687+
callbacks.OnCloseArray();
688+
break;
689+
}
690+
case simdjson::dom::element_type::OBJECT: {
691+
callbacks.OnOpenMap();
692+
693+
simdjson::dom::object v;
694+
RETURN_IF_NOT_SUCCESS(value.get(v));
695+
for (const auto& item : v) {
696+
callbacks.OnMapKey(item.key);
697+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks));
698+
}
699+
700+
callbacks.OnCloseMap();
701+
break;
702+
}
703+
}
704+
return simdjson::SUCCESS;
705+
#undef RETURN_IF_NOT_SUCCESS
706+
}
707+
}
708+
709+
TMaybe<TBinaryJson> SerializeToBinaryJsonImplRapidjson(const TStringBuf json) {
557710
TMemoryInput input(json.data(), json.size());
558711
TBinaryJsonCallbacks callbacks(/* throwException */ false);
559712
if (!ReadJson(&input, &callbacks)) {
560713
return Nothing();
561714
}
562715
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
563716
return std::move(serializer).Serialize();
717+
}
564718

719+
TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
720+
thread_local simdjson::dom::parser parser;
721+
auto doc = parser.parse(json);
722+
if (doc.error() != simdjson::SUCCESS) {
723+
if (doc.error() == simdjson::BIGINT_ERROR) {
724+
return SerializeToBinaryJsonImplRapidjson(json);
725+
}
726+
return Nothing();
727+
}
728+
TBinaryJsonCallbacks callbacks(/* throwException */ false);
729+
if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) {
730+
return Nothing();
731+
}
732+
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
733+
return std::move(serializer).Serialize();
565734
}
566735

567736
TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) {

ydb/library/binary_json/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ YQL_ABI_VERSION(
99
PEERDIR(
1010
library/cpp/json
1111
ydb/library/yql/minikql/dom
12+
contrib/libs/simdjson
1213
)
1314

1415
SRCS(
@@ -23,4 +24,5 @@ END()
2324

2425
RECURSE_FOR_TESTS(
2526
ut
27+
ut_benchmark
2628
)

0 commit comments

Comments
 (0)