Skip to content

Commit be95526

Browse files
committed
schemeboard: pass describe-result as an opaque payload
Change type of `{TEvUpdate,TEvNotify}.DescribeSchemeResult` from transparent `TEvDescribeSchemeResult` to opaque `bytes` and support that throughout Populator, Replica, Subscriber actors. Properly typed TEvDescribeSchemeResult induce additional overhead to automatically serialize and deserialize this message when transfering over the wire. This performance cost is usually either negligible or imperceptible. But in specific situations, particularly when rapidly updating partitioning information for tables with huge number of shards, this overhead could lead to significant issues. Schemeboard replicas could get overloaded and become unresponsive to further requests. This is problematic, especially considering the schemeboard subsystem's critical role in servicing all databases within a cluster, making it a SPOF. The core realization is that the schemeboard components do not require the full content of a TEvDescribeSchemeResult message to operate efficiently. Instead, only a limited set of fields (path, path-id, version and info about subdomain/database) is required for processing. And a whole TEvDescribeSchemeResult could be passed through as an opaque payload. Type change from TEvDescribeSchemeResult to bytes without changing field number is a safe move. Actual value of the field remains unchanged at the wire protocol level. Thus, older implementations will interpret the payload as a TEvDescribeSchemeResult message and proceed with deserialization as usual. And newer implementations will recognize the data as a binary blob and will deserialize it explicitly only when necessary. KIKIMR-14948
1 parent 0451129 commit be95526

19 files changed

+557
-286
lines changed

ydb/core/protos/flat_tx_scheme.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ message TEvDescribeSchemeResult {
112112
optional string LastExistedPrefixPath = 7;
113113
optional fixed64 LastExistedPrefixPathId = 8;
114114
optional NKikimrSchemeOp.TPathDescription LastExistedPrefixDescription = 9;
115+
115116
optional fixed64 PathOwnerId = 10;
116117
}
117118

ydb/core/protos/scheme_board.proto

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import "ydb/core/protos/flat_tx_scheme.proto";
1+
import "ydb/core/scheme/protos/pathid.proto";
22

33
package NKikimrSchemeBoard;
44
option java_package = "ru.yandex.kikimr.proto";
@@ -13,22 +13,68 @@ message TEvHandshake {
1313
optional uint64 Generation = 2;
1414
}
1515

16-
// here and below
17-
// Owner is the tablet id of schemeshard witch holds the records
18-
// LocalPathId is a second part of TPathId
19-
// PathOwnerId is a first part of TPathId
16+
// Here and below.
17+
// Owner is the tablet id of schemeshard which holds the records.
18+
// (PathOwnerId, LocalPathId) constitute TPathId of the object.
2019

20+
// TEvUpdate.DescribeSchemeResultSerialized is a NKikimrScheme.TEvDescribeSchemeResult
21+
// in the form of opaque payload.
22+
// Originally, that field existed as a properly typed TEvDescribeSchemeResult message.
23+
// However, that induce additional overhead to serialize and deserialize this message
24+
// when transfering over wire.
25+
// This performance cost is usually either negligible or imperceptible.
26+
// But in specific situations, particularly when rapidly updating partitioning information
27+
// for tables with huge number of shards, this overhead could lead to significant issues.
28+
// Schemeboard replicas could get overloaded and become unresponsive to further requests.
29+
// This is problematic, especially considering the schemeboard subsystem's critical role
30+
// in servicing all databases within a cluster, making it a Single Point of Failure (SPOF).
31+
//
32+
// The core realization is that the schemeboard components do not require the full content of
33+
// a TEvDescribeSchemeResult message to operate efficiently. Instead, only a limited set of
34+
// fields (path, path-id, version and info about subdomain/database) is required for processing.
35+
// And a whole TEvDescribeSchemeResult could be passed through as an opaque payload.
36+
//
37+
// Type change from TEvDescribeSchemeResult to bytes without changing field number
38+
// is a safe move. Actual value of the field remains unchanged at the wire protocol level.
39+
// Thus, older implementations will interpret the payload as a TEvDescribeSchemeResult message
40+
// and proceed with deserialization as usual. And newer implementations will recognize the data
41+
// as a binary blob and will deserialize it explicitly only when necessary.
42+
//
43+
// - Path
44+
// - PathOwnerId, LocalPathId
45+
// - PathDirEntryPathVersion
46+
// - PathSubdomainPathId
47+
// - PathAbandonedTenantsSchemeShards
48+
// are taken from the original TEvDescribeSchemeResult (one way or another).
49+
//
2150
message TEvUpdate {
2251
optional uint64 Owner = 1;
2352
optional uint64 Generation = 2;
2453
optional TLocalPathIdRange DeletedLocalPathIds = 3;
25-
optional string Path = 4;
26-
optional uint64 LocalPathId = 5;
54+
55+
optional string Path = 4; // extracted from DescribeSchemeResult.Path
56+
optional uint64 LocalPathId = 5; // extracted from DescribeSchemeResult.PathId
57+
2758
optional bool IsDeletion = 6 [default = false];
28-
optional NKikimrScheme.TEvDescribeSchemeResult DescribeSchemeResult = 7;
59+
60+
optional bytes DescribeSchemeResultSerialized = 7;
61+
2962
optional bool NeedAck = 8 [default = false];
30-
optional uint64 PathOwnerId = 9;
63+
64+
optional uint64 PathOwnerId = 9; // extracted from DescribeSchemeResult.PathOwnerId, DescribeSchemeResult.PathDescription.Self.SchemeshardId in order of presence
65+
3166
optional TLocalPathIdRange MigratedLocalPathIds = 10;
67+
68+
// Explicit values extracted from DescribeSchemeResultSerialized
69+
70+
// DescribeSchemeResult.PathDescription.Self.PathVersion
71+
optional uint64 PathDirEntryPathVersion = 11;
72+
73+
// DescribeSchemeResult.PathDescription.DomainDescription.DomainKey
74+
optional NKikimrProto.TPathID PathSubdomainPathId = 13;
75+
76+
// DescribeSchemeResult.PathDescription.AbandonedTenantsSchemeShards
77+
repeated uint64 PathAbandonedTenantsSchemeShards = 14;
3278
}
3379

3480
message TEvUpdateAck {
@@ -65,16 +111,22 @@ message TEvUnsubscribe {
65111
optional uint64 LocalPathId = 3;
66112
}
67113

114+
// See comments for TEvUpdate.
68115
message TEvNotify {
69116
optional string Path = 1;
70117
// and/or
71118
optional uint64 PathOwnerId = 2;
72119
optional uint64 LocalPathId = 3;
73120
// common fields
74121
optional bool IsDeletion = 4 [default = false];
75-
optional NKikimrScheme.TEvDescribeSchemeResult DescribeSchemeResult = 5;
76-
optional uint64 Version = 6;
122+
123+
optional bytes DescribeSchemeResultSerialized = 5;
124+
125+
optional uint64 Version = 6; // same as TEvUpdate.PathDirEntryPathVersion
77126
optional bool Strong = 7 [default = false];
127+
128+
optional NKikimrProto.TPathID PathSubdomainPathId = 8;
129+
repeated uint64 PathAbandonedTenantsSchemeShards = 9;
78130
}
79131

80132
message TEvNotifyAck {

ydb/core/tx/scheme_board/cache_ut.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ class TCacheTest: public TTestWithSchemeshard {
3737
" Kind: \"pool-kind-1\" "
3838
"} "
3939
" Name: \"Root\" ");
40+
41+
// Context->SetLogPriority(NKikimrServices::SCHEME_BOARD_REPLICA, NLog::PRI_DEBUG);
42+
// Context->SetLogPriority(NKikimrServices::SCHEME_BOARD_SUBSCRIBER, NLog::PRI_DEBUG);
43+
// Context->SetLogPriority(NKikimrServices::TX_PROXY_SCHEME_CACHE, NLog::PRI_DEBUG);
44+
// Context->SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NLog::PRI_DEBUG);
4045
}
4146

4247
UNIT_TEST_SUITE(TCacheTest);
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#include "helpers.h"
2+
#include "opaque_path_description.h"
3+
#include "events.h"
4+
5+
namespace NKikimr {
6+
7+
using namespace NSchemeBoard;
8+
9+
namespace {
10+
11+
//NOTE: This function moves out DescribeSchemeResultSerialized from TEvUpdate message.
12+
// It cannot be used twice on the same message.
13+
TOpaquePathDescription MakeOpaquePathDescription(NKikimrSchemeBoard::TEvUpdate& record) {
14+
Y_ABORT_UNLESS(record.HasDescribeSchemeResultSerialized());
15+
16+
// Move out DescribeSchemeResult blob.
17+
// And mark field empty to prevent subsequent extraction attempts
18+
auto data = std::move(*record.MutableDescribeSchemeResultSerialized());
19+
record.ClearDescribeSchemeResultSerialized();
20+
21+
return TOpaquePathDescription{
22+
.DescribeSchemeResultSerialized = std::move(data),
23+
//NOTE: unsuccessful describe results cannot be here, by design
24+
.Status = NKikimrScheme::StatusSuccess,
25+
.PathId = TPathId(record.GetPathOwnerId(), record.GetLocalPathId()),
26+
.Path = record.GetPath(),
27+
.PathVersion = record.GetPathDirEntryPathVersion(),
28+
.SubdomainPathId = PathIdFromPathId(record.GetPathSubdomainPathId()),
29+
.PathAbandonedTenantsSchemeShards = TSet<ui64>(
30+
record.GetPathAbandonedTenantsSchemeShards().begin(),
31+
record.GetPathAbandonedTenantsSchemeShards().end()
32+
)
33+
};
34+
}
35+
36+
37+
} // anonymous namespace
38+
39+
// TSchemeBoardEvents::TEvUpdate
40+
//
41+
42+
TOpaquePathDescription TSchemeBoardEvents::TEvUpdate::ExtractPathDescription() {
43+
return MakeOpaquePathDescription(Record);
44+
}
45+
46+
// TSchemeBoardEvents::TEvUpdateBuilder
47+
//
48+
49+
TSchemeBoardEvents::TEvUpdateBuilder::TEvUpdateBuilder(const ui64 owner, const ui64 generation) {
50+
Record.SetOwner(owner);
51+
Record.SetGeneration(generation);
52+
}
53+
54+
TSchemeBoardEvents::TEvUpdateBuilder::TEvUpdateBuilder(const ui64 owner, const ui64 generation, const TPathId& pathId) {
55+
Record.SetOwner(owner);
56+
Record.SetGeneration(generation);
57+
Record.SetPathOwnerId(pathId.OwnerId);
58+
Record.SetLocalPathId(pathId.LocalPathId);
59+
Record.SetIsDeletion(true);
60+
}
61+
62+
TSchemeBoardEvents::TEvUpdateBuilder::TEvUpdateBuilder(
63+
const ui64 owner,
64+
const ui64 generation,
65+
const TOpaquePathDescription& pathDescription,
66+
const bool isDeletion
67+
) {
68+
Record.SetOwner(owner);
69+
Record.SetGeneration(generation);
70+
Record.SetIsDeletion(isDeletion);
71+
72+
Record.SetPath(pathDescription.Path);
73+
Record.SetPathOwnerId(pathDescription.PathId.OwnerId);
74+
Record.SetLocalPathId(pathDescription.PathId.LocalPathId);
75+
76+
Record.SetPathDirEntryPathVersion(pathDescription.PathVersion);
77+
PathIdFromPathId(pathDescription.SubdomainPathId, Record.MutablePathSubdomainPathId());
78+
79+
Record.MutablePathAbandonedTenantsSchemeShards()->Assign(
80+
pathDescription.PathAbandonedTenantsSchemeShards.begin(),
81+
pathDescription.PathAbandonedTenantsSchemeShards.end()
82+
);
83+
}
84+
85+
void TSchemeBoardEvents::TEvUpdateBuilder::SetDescribeSchemeResultSerialized(const TString& serialized) {
86+
Record.SetDescribeSchemeResultSerialized(serialized);
87+
}
88+
89+
void TSchemeBoardEvents::TEvUpdateBuilder::SetDescribeSchemeResultSerialized(TString&& serialized) {
90+
Record.SetDescribeSchemeResultSerialized(std::move(serialized));
91+
}
92+
93+
// TSchemeBoardEvents::TEvNotifyBuilder
94+
//
95+
96+
TSchemeBoardEvents::TEvNotifyBuilder::TEvNotifyBuilder(const TString& path, const bool isDeletion /*= false*/) {
97+
Record.SetPath(path);
98+
Record.SetIsDeletion(isDeletion);
99+
}
100+
101+
TSchemeBoardEvents::TEvNotifyBuilder::TEvNotifyBuilder(const TPathId& pathId, const bool isDeletion /*= false*/) {
102+
Record.SetPathOwnerId(pathId.OwnerId);
103+
Record.SetLocalPathId(pathId.LocalPathId);
104+
Record.SetIsDeletion(isDeletion);
105+
}
106+
107+
TSchemeBoardEvents::TEvNotifyBuilder::TEvNotifyBuilder(const TString& path, const TPathId& pathId, const bool isDeletion /*= false*/) {
108+
Record.SetPath(path);
109+
Record.SetPathOwnerId(pathId.OwnerId);
110+
Record.SetLocalPathId(pathId.LocalPathId);
111+
Record.SetIsDeletion(isDeletion);
112+
}
113+
114+
void TSchemeBoardEvents::TEvNotifyBuilder::SetPathDescription(const TOpaquePathDescription& pathDescription) {
115+
Record.SetDescribeSchemeResultSerialized(pathDescription.DescribeSchemeResultSerialized);
116+
PathIdFromPathId(pathDescription.SubdomainPathId, Record.MutablePathSubdomainPathId());
117+
Record.MutablePathAbandonedTenantsSchemeShards()->Assign(
118+
pathDescription.PathAbandonedTenantsSchemeShards.begin(),
119+
pathDescription.PathAbandonedTenantsSchemeShards.end()
120+
);
121+
}
122+
123+
} // NKikimr

0 commit comments

Comments
 (0)