Skip to content

Commit b0d6515

Browse files
added test
1 parent 01ba776 commit b0d6515

File tree

2 files changed

+45
-107
lines changed

2 files changed

+45
-107
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 29 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
783783
}
784784

785785
void Bootstrap() {
786-
Cerr << "iiiiii Bootstrap " << SelfId() << Endl;
787786
FilterDatabase = Request->Database;
788787
if (Request->Request.operation_params().has_operation_timeout()) {
789788
Timeout = GetDuration(Request->Request.operation_params().operation_timeout());
@@ -838,7 +837,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
838837
}
839838

840839
void Handle(TEvNodeWardenStorageConfig::TPtr ev) {
841-
Cerr << "aaaaa TEvNodeWardenStorageConfig" << Endl;
842840
NodeWardenStorageConfig->Set(std::move(ev));
843841
if (const NKikimrBlobStorage::TStorageConfig& config = *NodeWardenStorageConfig->Get()->Config; config.HasBlobStorageConfig()) {
844842
if (const auto& bsConfig = config.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
@@ -870,7 +868,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
870868
}
871869

872870
auto groupId = vDisk.GetVDiskID().GetGroupID();
873-
Cerr << "aaaaa TEvNodeWardenStorageConfig 2" << Endl;
874871
if (NeedWhiteboardInfoForGroup(groupId)) {
875872
BLOG_D("Requesting whiteboard for group " << groupId);
876873
RequestStorageNode(vDisk.GetVDiskLocation().GetNodeID());
@@ -1118,7 +1115,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11181115
}
11191116

11201117
void RequestStorageNode(TNodeId nodeId) {
1121-
Cerr << "aaaaaaa RequestStorageNode " << nodeId << Endl;
11221118
if (StorageNodeIds.emplace(nodeId).second) {
11231119
RequestGenericNode(nodeId);
11241120
if (NodeVDiskState.count(nodeId) == 0) {
@@ -1159,16 +1155,14 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11591155
}
11601156

11611157
void Handle(TEvPrivate::TEvRetryNodeWhiteboard::TPtr& ev) {
1162-
Cerr << "!!!!!!! Handle RetryNodeWhiteboard " << Endl;
11631158
auto eventId = ev->Get()->EventId;
11641159
auto nodeId = ev->Get()->NodeId;
11651160
switch (eventId) {
11661161
case TEvWhiteboard::EvSystemStateRequest:
1167-
Cerr << "!!!!!!! Handle RetryNodeWhiteboard EvSystemStateRequest " << Endl;
1168-
// if (!NodeSystemState[nodeId].IsDone()) {
1162+
if (!NodeSystemState[nodeId].IsDone()) {
11691163
NodeSystemState.erase(nodeId);
1170-
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId);
1171-
// }
1164+
NodeSystemState[nodeId] = RequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId, {-1});
1165+
}
11721166
break;
11731167
case TEvWhiteboard::EvVDiskStateRequest:
11741168
if (!NodeVDiskState[nodeId].IsDone()) {
@@ -1196,7 +1190,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11961190

11971191
template<typename TEvent>
11981192
bool RetryRequestNodeWhiteboard(TNodeId nodeId) {
1199-
Cerr << "!!!!!!! RetryRequestNodeWhiteboard " << nodeId << Endl;
12001193
if (NodeRetries[{nodeId, TEvent::EventType}]++ < MaxRetries) {
12011194
Schedule(RetryDelay, new TEvPrivate::TEvRetryNodeWhiteboard(nodeId, TEvent::EventType));
12021195
return true;
@@ -1205,7 +1198,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12051198
}
12061199

12071200
void Handle(TEvents::TEvUndelivered::TPtr& ev) {
1208-
Cerr << "iiiiiiiii Undelivered " << Endl;
12091201
ui32 nodeId = ev.Get()->Cookie;
12101202
TString error = "Undelivered";
12111203
if (ev->Get()->SourceType == TEvWhiteboard::EvSystemStateRequest) {
@@ -1242,7 +1234,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12421234
}
12431235

12441236
void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
1245-
Cerr << "iiiiiiiii Disconnected " << Endl;
12461237
ui32 nodeId = ev->Get()->NodeId;
12471238
TString error = "NodeDisconnected";
12481239
if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) {
@@ -1327,10 +1318,8 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
13271318
}
13281319

13291320
void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) {
1330-
Cerr << "aaaaa HandleTimeout" << Endl;
13311321
switch (ev->Get()->Tag) {
13321322
case TimeoutBSC:
1333-
Cerr << "aaaaa TimeoutBSC" << Endl;
13341323
Span.Event("TimeoutBSC");
13351324
if (!HaveAllBSControllerInfo()) {
13361325
if (FilterDatabase.empty() || FilterDatabase == DomainPath) {
@@ -1651,32 +1640,14 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
16511640
RequestDone("TEvListTenantsResponse");
16521641
}
16531642

1654-
// void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
1655-
// TNodeId nodeId = ev.Get()->Cookie;
1656-
// Cerr << "iiiiiiii TEvSystemStateResponse: nodeId: " << nodeId << Endl;
1657-
// if (!NodeSystemState[nodeId].Done()) {
1658-
// auto& nodeSystemState(NodeSystemState[nodeId]);
1659-
// nodeSystemState.Set(std::move(ev));
1660-
// for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) {
1661-
// state.set_nodeid(nodeId);
1662-
// MergedNodeSystemState[nodeId] = &state;
1663-
// }
1664-
// }
1665-
// RequestDone("TEvSystemStateResponse");
1666-
// }
1667-
16681643
void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
16691644
TNodeId nodeId = ev.Get()->Cookie;
1670-
Cerr << "iiiiiiii TEvSystemStateResponse: nodeId: " << nodeId << Endl;
1671-
16721645
auto& nodeSystemState(NodeSystemState[nodeId]);
16731646
nodeSystemState.Set(std::move(ev));
16741647
for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) {
1675-
Cerr << "iiiiiiii Fill " << Endl;
16761648
state.set_nodeid(nodeId);
16771649
MergedNodeSystemState[nodeId] = &state;
16781650
}
1679-
16801651
RequestDone("TEvSystemStateResponse");
16811652
}
16821653

@@ -1928,12 +1899,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19281899
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
19291900
}
19301901

1931-
Cerr << "iiiiiiii FillComputeNodeStatus: nodeId: " << nodeId << Endl;
19321902
auto itNodeSystemState = MergedNodeSystemState.find(nodeId);
19331903
if (itNodeSystemState != MergedNodeSystemState.end()) {
19341904
const NKikimrWhiteboard::TSystemStateInfo& nodeSystemState(*itNodeSystemState->second);
19351905

1936-
Cerr << "iiiiiiii poolstats: " << nodeSystemState.poolstats_size() << Endl;
19371906
for (const auto& poolStat : nodeSystemState.poolstats()) {
19381907
TSelfCheckContext poolContext(&context, "COMPUTE_POOL");
19391908
poolContext.Location.mutable_compute()->mutable_pool()->set_name(poolStat.name());
@@ -1982,7 +1951,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19821951
}
19831952
}
19841953
}
1985-
Cerr << "iiiiiiii nodeSystemState: 2 " << Endl;
19861954
} else {
19871955
// context.ReportStatus(Ydb::Monitoring::StatusFlag::RED,
19881956
// TStringBuilder() << "Compute node is not available",
@@ -2346,61 +2314,45 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
23462314

23472315
void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) {
23482316
TNodeId nodeId = ev.Get()->Cookie;
2349-
if (!NodeVDiskState.count(nodeId)) {
2350-
auto& nodeVDiskState(NodeVDiskState[nodeId]);
2351-
nodeVDiskState.Set(std::move(ev));
2352-
for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) {
2353-
state.set_nodeid(nodeId);
2354-
auto id = GetVDiskId(state.vdiskid());
2355-
MergedVDiskState[id] = &state;
2356-
}
2357-
}
2358-
2359-
TString error = "NodeDisconnected";
2360-
if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) {
2361-
if (!RetryRequestNodeWhiteboard<TEvWhiteboard::TEvSystemStateRequest>(nodeId)) {
2362-
Cerr << "iiiiiii Retry" << Endl;
2363-
RequestDone("node disconnected with TEvSystemStateRequest");
2364-
UnavailableComputeNodes.insert(nodeId);
2365-
}
2317+
auto& nodeVDiskState(NodeVDiskState[nodeId]);
2318+
nodeVDiskState.Set(std::move(ev));
2319+
for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) {
2320+
state.set_nodeid(nodeId);
2321+
auto id = GetVDiskId(state.vdiskid());
2322+
MergedVDiskState[id] = &state;
23662323
}
2367-
23682324
RequestDone("TEvVDiskStateResponse");
23692325
}
23702326

23712327
void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) {
23722328
TNodeId nodeId = ev.Get()->Cookie;
2373-
if (!NodePDiskState.count(nodeId)) {
2374-
auto& nodePDiskState(NodePDiskState[nodeId]);
2375-
nodePDiskState.Set(std::move(ev));
2376-
for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) {
2377-
state.set_nodeid(nodeId);
2378-
auto id = GetPDiskId(state);
2379-
MergedPDiskState[id] = &state;
2380-
}
2329+
auto& nodePDiskState(NodePDiskState[nodeId]);
2330+
nodePDiskState.Set(std::move(ev));
2331+
for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) {
2332+
state.set_nodeid(nodeId);
2333+
auto id = GetPDiskId(state);
2334+
MergedPDiskState[id] = &state;
23812335
}
23822336
RequestDone("TEvPDiskStateResponse");
23832337
}
23842338

23852339
void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) {
23862340
ui64 nodeId = ev.Get()->Cookie;
2387-
if (!NodeBSGroupState.count(nodeId)) {
2388-
auto& nodeBSGroupState(NodeBSGroupState[nodeId]);
2389-
nodeBSGroupState.Set(std::move(ev));
2390-
for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) {
2391-
state.set_nodeid(nodeId);
2392-
TString storagePoolName = state.storagepoolname();
2393-
TGroupID groupId(state.groupid());
2394-
const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]);
2395-
if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) {
2396-
current = &state;
2397-
}
2398-
if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) {
2399-
continue;
2400-
}
2401-
StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid());
2402-
StoragePoolStateByName[storagePoolName].Name = storagePoolName;
2341+
auto& nodeBSGroupState(NodeBSGroupState[nodeId]);
2342+
nodeBSGroupState.Set(std::move(ev));
2343+
for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) {
2344+
state.set_nodeid(nodeId);
2345+
TString storagePoolName = state.storagepoolname();
2346+
TGroupID groupId(state.groupid());
2347+
const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]);
2348+
if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) {
2349+
current = &state;
2350+
}
2351+
if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) {
2352+
continue;
24032353
}
2354+
StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid());
2355+
StoragePoolStateByName[storagePoolName].Name = storagePoolName;
24042356
}
24052357
RequestDone("TEvBSGroupStateResponse");
24062358
}

ydb/core/health_check/health_check_ut.cpp

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,7 +2354,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
23542354
UNIT_ASSERT(pdiskIssueFoundInResult);
23552355
}
23562356

2357-
Y_UNIT_TEST(TestWhiteboardResponseOnSameNode) {
2357+
Y_UNIT_TEST(TestSystemStateRetriesAfterReceivingResponse) {
23582358
TPortManager tp;
23592359
ui16 port = tp.GetPort(2134);
23602360
ui16 grpcPort = tp.GetPort(2135);
@@ -2371,47 +2371,33 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
23712371
TActorId sender = runtime.AllocateEdgeActor();
23722372
TAutoPtr<IEventHandle> handle;
23732373

2374-
std::optional<TNodeId> nodeId;
2374+
std::optional<TActorId> targetActor;
23752375
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
23762376
switch (ev->GetTypeRewrite()) {
2377-
case TEvWhiteboard::EvVDiskStateResponse: {
2378-
auto* msg = ev->Release<TEvWhiteboard::TEvVDiskStateResponse>().Release();
2379-
msg->Record.ClearVDiskStateInfo(); // whiteboard doesn't have any update
2380-
ev.Reset(new IEventHandle(ev->Recipient, ev->Sender, msg, ev->Flags, *nodeId));
2381-
break;
2382-
}
23832377
case TEvWhiteboard::EvSystemStateResponse: {
2384-
if (!nodeId) {
2385-
nodeId = ev->Cookie;
2386-
} else {
2387-
auto* msg = ev->Release<TEvWhiteboard::TEvSystemStateResponse>().Release();
2388-
msg->Record.ClearSystemStateInfo(); // whiteboard doesn't have any update
2389-
ev.Reset(new IEventHandle(ev->Recipient, ev->Sender, msg, ev->Flags, *nodeId));
2378+
if (ev->Cookie == 1) {
2379+
if (!targetActor) {
2380+
targetActor = ev->Recipient;
2381+
runtime.Send(ev.Release());
2382+
runtime.Send(new IEventHandle(
2383+
*targetActor,
2384+
sender,
2385+
new NHealthCheck::TEvPrivate::TEvRetryNodeWhiteboard(1, TEvWhiteboard::TEvSystemStateRequest::EventType)
2386+
));
2387+
2388+
}
2389+
return TTestActorRuntime::EEventAction::DROP;
23902390
}
23912391
break;
23922392
}
23932393
}
2394-
23952394
return TTestActorRuntime::EEventAction::PROCESS;
23962395
};
23972396
runtime.SetObserverFunc(observerFunc);
2398-
2399-
2400-
2401-
if (!delayed) {
2402-
TDispatchOptions opts;
2403-
opts.FinalEvents.emplace_back([&delayed](IEventHandle&) {
2404-
return bool(delayed);
2405-
});
2406-
server->GetRuntime()->DispatchEvents(opts);
2407-
}
2408-
24092397
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
2410-
Cerr << "iiiiii try 1" << Endl;
2411-
runtime.GrabEdgeEvent<TEvWhiteboard::TEvSystemStateResponse>(handle);
2412-
Cerr << "iiiiii try 2" << Endl;
2398+
24132399
auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
2414-
Cerr << result.ShortDebugString();
2400+
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
24152401
}
24162402
}
24172403
}

0 commit comments

Comments
 (0)