Skip to content

Commit d98dfdd

Browse files
authored
Merge 38313d5 into 0be743e
2 parents 0be743e + 38313d5 commit d98dfdd

File tree

4 files changed

+169
-24
lines changed

4 files changed

+169
-24
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include <benchmark/benchmark.h>
2+
3+
#include <util/random/random.h>
4+
#include <library/cpp/testing/unittest/registar.h>
5+
#include <library/cpp/json/json_value.h>
6+
#include <library/cpp/json/json_writer.h>
7+
8+
#include <ydb/library/binary_json/write.h>
9+
10+
// ya test -r -D BENCHMARK_MAKE_LARGE_PART
11+
#ifndef BENCHMARK_MAKE_LARGE_PART
12+
#define BENCHMARK_MAKE_LARGE_PART 0
13+
#endif
14+
15+
using namespace NKikimr::NBinaryJson;
16+
17+
namespace {
18+
19+
static ui64 seed = 0;
20+
21+
NJson::TJsonValue GetTestJson(ui64 depth = 10, ui64 nChildren = 2) {
22+
NJson::TJsonValue value;
23+
if (depth == 1) {
24+
value.SetValue(NUnitTest::RandomString(10, seed++));
25+
return value;
26+
}
27+
for (ui64 i = 0; i < nChildren; ++i) {
28+
value.InsertValue(NUnitTest::RandomString(10, seed++), GetTestJson(depth - 1));
29+
}
30+
return value;
31+
}
32+
33+
TString GetTestJsonString() {
34+
seed = 42;
35+
return NJson::WriteJson(GetTestJson(2, 100));
36+
}
37+
38+
static void BenchWriteSimdJson(benchmark::State& state) {
39+
TString value = GetTestJsonString();
40+
TStringBuf buf(value);
41+
for (auto _ : state) {
42+
auto result = SerializeToBinaryJson(buf);
43+
benchmark::DoNotOptimize(result);
44+
benchmark::ClobberMemory();
45+
}
46+
}
47+
48+
}
49+
50+
BENCHMARK(BenchWriteSimdJson);
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
G_BENCHMARK()
2+
3+
TAG(ya:fat)
4+
SIZE(LARGE)
5+
TIMEOUT(600)
6+
7+
IF (BENCHMARK_MAKE_LARGE_PART)
8+
CFLAGS(
9+
-DBENCHMARK_MAKE_LARGE_PART=1
10+
)
11+
TIMEOUT(1200)
12+
ENDIF()
13+
14+
SRCS(
15+
write.cpp
16+
)
17+
18+
PEERDIR(
19+
library/cpp/testing/unittest
20+
ydb/library/binary_json
21+
)
22+
23+
YQL_LAST_ABI_VERSION()
24+
25+
END()

ydb/library/binary_json/write.cpp

Lines changed: 92 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
#include "write.h"
22

3+
#include <contrib/libs/simdjson/include/simdjson/dom/array-inl.h>
4+
#include <contrib/libs/simdjson/include/simdjson/dom/document-inl.h>
5+
#include <contrib/libs/simdjson/include/simdjson/dom/element-inl.h>
6+
#include <contrib/libs/simdjson/include/simdjson/dom/object-inl.h>
7+
#include <contrib/libs/simdjson/include/simdjson/dom/parser-inl.h>
38
#include <library/cpp/json/json_reader.h>
49

510
#include <util/generic/vector.h>
@@ -74,38 +79,29 @@ struct TJsonIndex {
7479
ui32 InternKey(const TStringBuf value) {
7580
TotalKeysCount++;
7681

77-
const auto it = Keys.find(value);
78-
if (it == Keys.end()) {
79-
const ui32 currentIndex = LastFreeStringIndex++;
80-
Keys[TString(value)] = currentIndex;
82+
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
83+
if (emplaced) {
84+
++LastFreeStringIndex;
8185
TotalKeyLength += value.length() + 1;
82-
return currentIndex;
83-
} else {
84-
return it->second;
8586
}
87+
return it->second;
8688
}
8789

8890
ui32 InternString(const TStringBuf value) {
89-
const auto it = Strings.find(value);
90-
if (it == Strings.end()) {
91-
const ui32 currentIndex = LastFreeStringIndex++;
92-
Strings[value] = currentIndex;
91+
const auto [it, emplaced] = Keys.emplace(value, LastFreeStringIndex);
92+
if (emplaced) {
93+
++LastFreeStringIndex;
9394
TotalStringLength += value.length() + 1;
94-
return currentIndex;
95-
} else {
96-
return it->second;
9795
}
96+
return it->second;
9897
}
9998

10099
ui32 InternNumber(double value) {
101-
const auto it = Numbers.find(value);
102-
if (it == Numbers.end()) {
103-
const ui32 currentIndex = LastFreeNumberIndex++;
104-
Numbers[value] = currentIndex;
105-
return currentIndex;
106-
} else {
107-
return it->second;
100+
const auto [it, emplaced] = Numbers.emplace(value, LastFreeNumberIndex);
101+
if (emplaced) {
102+
++LastFreeNumberIndex;
108103
}
104+
return it->second;
109105
}
110106

111107
void AddContainer(EContainerType type) {
@@ -551,17 +547,89 @@ void DomToJsonIndex(const NUdf::TUnboxedValue& value, TBinaryJsonCallbacks& call
551547
}
552548
}
553549

550+
[[nodiscard]] simdjson::error_code SimdJsonToJsonIndex(const simdjson::dom::element& value, TBinaryJsonCallbacks& callbacks) {
551+
#define RETURN_IF_NOT_SUCCESS(status) \
552+
if (Y_UNLIKELY(status != simdjson::SUCCESS)) { \
553+
return status; \
554+
}
555+
556+
switch (value.type()) {
557+
case simdjson::dom::element_type::STRING: {
558+
std::string_view v;
559+
RETURN_IF_NOT_SUCCESS(value.get(v));
560+
callbacks.OnString(v);
561+
break;
562+
}
563+
case simdjson::dom::element_type::BOOL: {
564+
bool v;
565+
RETURN_IF_NOT_SUCCESS(value.get(v));
566+
callbacks.OnBoolean(v);
567+
break;
568+
}
569+
case simdjson::dom::element_type::INT64: {
570+
i64 v;
571+
RETURN_IF_NOT_SUCCESS(value.get(v));
572+
callbacks.OnInteger(v);
573+
break;
574+
}
575+
case simdjson::dom::element_type::UINT64: {
576+
ui64 v;
577+
RETURN_IF_NOT_SUCCESS(value.get(v));
578+
callbacks.OnUInteger(v);
579+
break;
580+
}
581+
case simdjson::dom::element_type::DOUBLE: {
582+
double v;
583+
RETURN_IF_NOT_SUCCESS(value.get(v));
584+
callbacks.OnDouble(v);
585+
break;
586+
}
587+
case simdjson::dom::element_type::NULL_VALUE:
588+
callbacks.OnNull();
589+
break;
590+
case simdjson::dom::element_type::ARRAY: {
591+
callbacks.OnOpenArray();
592+
593+
simdjson::dom::array v;
594+
RETURN_IF_NOT_SUCCESS(value.get(v));
595+
for (const auto& item : v) {
596+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item, callbacks));
597+
}
598+
599+
callbacks.OnCloseArray();
600+
break;
601+
}
602+
case simdjson::dom::element_type::OBJECT: {
603+
callbacks.OnOpenMap();
604+
605+
simdjson::dom::object v;
606+
RETURN_IF_NOT_SUCCESS(value.get(v));
607+
for (const auto& item : v) {
608+
callbacks.OnMapKey(item.key);
609+
RETURN_IF_NOT_SUCCESS(SimdJsonToJsonIndex(item.value, callbacks));
610+
}
611+
612+
callbacks.OnCloseMap();
613+
break;
614+
}
615+
}
616+
return simdjson::SUCCESS;
617+
#undef RETURN_IF_NOT_SUCCESS
618+
}
554619
}
555620

556621
TMaybe<TBinaryJson> SerializeToBinaryJsonImpl(const TStringBuf json) {
557-
TMemoryInput input(json.data(), json.size());
622+
simdjson::dom::parser parser;
623+
auto doc = parser.parse(json);
624+
if (doc.error() != simdjson::SUCCESS) {
625+
return Nothing();
626+
}
558627
TBinaryJsonCallbacks callbacks(/* throwException */ false);
559-
if (!ReadJson(&input, &callbacks)) {
628+
if (SimdJsonToJsonIndex(doc.value(), callbacks) != simdjson::SUCCESS) {
560629
return Nothing();
561630
}
562631
TBinaryJsonSerializer serializer(std::move(callbacks).GetResult());
563632
return std::move(serializer).Serialize();
564-
565633
}
566634

567635
TMaybe<TBinaryJson> SerializeToBinaryJson(const TStringBuf json) {

ydb/library/binary_json/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ YQL_ABI_VERSION(
99
PEERDIR(
1010
library/cpp/json
1111
ydb/library/yql/minikql/dom
12+
contrib/libs/simdjson
1213
)
1314

1415
SRCS(
@@ -23,4 +24,5 @@ END()
2324

2425
RECURSE_FOR_TESTS(
2526
ut
27+
benchmark
2628
)

0 commit comments

Comments
 (0)