Skip to content

Commit c652119

Browse files
YDB-1066 Added healthcheck RED issue for no pools in database (#2098)
1 parent a41d41d commit c652119

File tree

2 files changed

+126
-22
lines changed

2 files changed

+126
-22
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1958,8 +1958,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19581958

19591959
void FillStorage(TDatabaseState& databaseState, Ydb::Monitoring::StorageStatus& storageStatus, TSelfCheckContext context) {
19601960
if (databaseState.StoragePoolNames.empty()) {
1961-
// pointless in real life
1962-
// context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "There are no storage pools");
1961+
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "There are no storage pools", ETags::StorageState);
19631962
} else {
19641963
for (const TString& poolName : databaseState.StoragePoolNames) {
19651964
auto itStoragePoolState = StoragePoolState.find(poolName);

ydb/core/health_check/health_check_ut.cpp

Lines changed: 125 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
146146
}
147147

148148
auto groupId = GROUP_START_ID;
149-
149+
150150
auto group = pbConfig->add_group();
151151
group->CopyFrom(groupSample);
152152
group->set_groupid(groupId);
@@ -155,7 +155,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
155155

156156
group->clear_vslotid();
157157
auto vslotId = VCARD_START_ID;
158-
158+
159159
for (auto status: vdiskStatuses) {
160160
auto vslot = pbConfig->add_vslot();
161161
vslot->CopyFrom(vslotSample);
@@ -192,7 +192,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
192192
}
193193

194194
auto groupId = GROUP_START_ID;
195-
195+
196196
auto group = pbConfig->add_group();
197197
group->CopyFrom(groupSample);
198198
group->set_groupid(groupId);
@@ -201,7 +201,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
201201

202202
group->clear_vslotid();
203203
auto vslotId = VCARD_START_ID;
204-
204+
205205
for (auto status: vdiskStatuses) {
206206
auto vslot = pbConfig->add_vslot();
207207
vslot->CopyFrom(vslotSample);
@@ -547,7 +547,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
547547
auto result = RequestHc(1, 100, false, true);
548548
CheckHcProtobufSizeIssue(result, Ydb::Monitoring::StatusFlag::RED, 1);
549549
}
550-
550+
551551
void ClearLoadAverage(TEvWhiteboard::TEvSystemStateResponse::TPtr* ev) {
552552
auto *systemStateInfo = (*ev)->Get()->Record.MutableSystemStateInfo();
553553
for (NKikimrWhiteboard::TSystemStateInfo &state : *systemStateInfo) {
@@ -618,7 +618,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
618618
(*ev)->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult);
619619
for (const auto &path : paths) {
620620
listTenantsResult.Addpaths(path);
621-
}
621+
}
622622
(*ev)->Get()->Record.MutableResponse()->mutable_operation()->mutable_result()->PackFrom(listTenantsResult);
623623
}
624624

@@ -632,13 +632,13 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
632632
sharedNodeStats->MutableNodeDomain()->SetSchemeShard(SHARED_DOMAIN_KEY.OwnerId);
633633
sharedNodeStats->MutableNodeDomain()->SetPathId(SHARED_DOMAIN_KEY.LocalPathId);
634634
}
635-
635+
636636
if (exclusiveDynNodeId) {
637637
auto *exclusiveNodeStats = record.MutableNodeStats()->Add();
638638
exclusiveNodeStats->SetNodeId(exclusiveDynNodeId);
639639
exclusiveNodeStats->MutableNodeDomain()->SetSchemeShard(SERVERLESS_DOMAIN_KEY.OwnerId);
640640
exclusiveNodeStats->MutableNodeDomain()->SetPathId(SERVERLESS_DOMAIN_KEY.LocalPathId);
641-
}
641+
}
642642
}
643643

644644
Y_UNIT_TEST(SpecificServerless) {
@@ -671,7 +671,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
671671
}
672672
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
673673
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
674-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
674+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
675675
break;
676676
}
677677
case TEvHive::EvResponseHiveNodeStats: {
@@ -762,7 +762,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
762762
}
763763
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
764764
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
765-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
765+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
766766
break;
767767
}
768768
case TEvHive::EvResponseHiveNodeStats: {
@@ -864,7 +864,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
864864
}
865865
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
866866
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
867-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
867+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
868868
break;
869869
}
870870
case TEvHive::EvResponseHiveNodeStats: {
@@ -955,7 +955,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
955955
}
956956
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
957957
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
958-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
958+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
959959
break;
960960
}
961961
case TEvHive::EvResponseHiveNodeStats: {
@@ -1000,7 +1000,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
10001000

10011001
Ctest << result.ShortDebugString();
10021002
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
1003-
1003+
10041004
bool databaseFoundInResult = false;
10051005
for (const auto &database_status : result.database_status()) {
10061006
if (database_status.name() == "/Root/serverless") {
@@ -1018,7 +1018,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
10181018
}
10191019
UNIT_ASSERT(databaseFoundInResult);
10201020
}
1021-
1021+
10221022
Y_UNIT_TEST(ServerlessWhenTroublesWithSharedNodes) {
10231023
TPortManager tp;
10241024
ui16 port = tp.GetPort(2134);
@@ -1044,7 +1044,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
10441044
}
10451045
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
10461046
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
1047-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
1047+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeShared, runtime);
10481048
break;
10491049
}
10501050
case TEvSchemeShard::EvDescribeSchemeResult: {
@@ -1073,7 +1073,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
10731073
return TTestActorRuntime::EEventAction::PROCESS;
10741074
};
10751075
runtime.SetObserverFunc(observerFunc);
1076-
1076+
10771077
TActorId sender = runtime.AllocateEdgeActor();
10781078
TAutoPtr<IEventHandle> handle;
10791079

@@ -1098,7 +1098,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
10981098
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools().size(), 1);
10991099
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].id(), SHARED_STORAGE_POOL_NAME);
11001100
}
1101-
1101+
11021102
Y_UNIT_TEST(ServerlessWithExclusiveNodesWhenTroublesWithSharedNodes) {
11031103
TPortManager tp;
11041104
ui16 port = tp.GetPort(2134);
@@ -1141,7 +1141,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
11411141
}
11421142
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
11431143
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
1144-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
1144+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
11451145
break;
11461146
}
11471147
case TEvHive::EvResponseHiveNodeStats: {
@@ -1277,7 +1277,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
12771277
}
12781278
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
12791279
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
1280-
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
1280+
ChangeNavigateKeyResultServerless(x, NKikimrSubDomains::EServerlessComputeResourcesModeExclusive, runtime);
12811281
break;
12821282
}
12831283
case TEvHive::EvResponseHiveNodeStats: {
@@ -1370,6 +1370,111 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
13701370
UNIT_ASSERT(sharedDatabaseFoundInResult);
13711371
UNIT_ASSERT(rootDatabaseFoundInResult);
13721372
}
1373-
}
13741373

1374+
Y_UNIT_TEST(NoStoragePools) {
1375+
TPortManager tp;
1376+
ui16 port = tp.GetPort(2134);
1377+
ui16 grpcPort = tp.GetPort(2135);
1378+
auto settings = TServerSettings(port)
1379+
.SetNodeCount(1)
1380+
.SetDynamicNodeCount(1)
1381+
.SetUseRealThreads(false)
1382+
.SetDomainName("Root");
1383+
TServer server(settings);
1384+
server.EnableGRpc(grpcPort);
1385+
TClient client(settings);
1386+
TTestActorRuntime& runtime = *server.GetRuntime();
1387+
1388+
auto &dynamicNameserviceConfig = runtime.GetAppData().DynamicNameserviceConfig;
1389+
dynamicNameserviceConfig->MaxStaticNodeId = runtime.GetNodeId(server.StaticNodes() - 1);
1390+
dynamicNameserviceConfig->MinDynamicNodeId = runtime.GetNodeId(server.StaticNodes());
1391+
dynamicNameserviceConfig->MaxDynamicNodeId = runtime.GetNodeId(server.StaticNodes() + server.DynamicNodes() - 1);
1392+
1393+
ui32 dynNodeId = runtime.GetNodeId(1);
1394+
1395+
const TPathId SUBDOMAIN_KEY = {7000000000, 1};
1396+
1397+
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
1398+
switch (ev->GetTypeRewrite()) {
1399+
case NConsole::TEvConsole::EvGetTenantStatusResponse: {
1400+
auto *x = reinterpret_cast<NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr*>(&ev);
1401+
ChangeGetTenantStatusResponse(x, "/Root/database");
1402+
break;
1403+
}
1404+
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
1405+
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
1406+
TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front());
1407+
TString path = CanonizePath(entry.Path);
1408+
if (path == "/Root/database" || entry.TableId.PathId == SUBDOMAIN_KEY) {
1409+
entry.Status = TSchemeCacheNavigate::EStatus::Ok;
1410+
entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain;
1411+
entry.Path = {"Root", "database"};
1412+
entry.DomainInfo = MakeIntrusive<TDomainInfo>(SUBDOMAIN_KEY, SUBDOMAIN_KEY);
1413+
auto domains = runtime.GetAppData().DomainsInfo;
1414+
auto domain = domains->Domains.begin()->second;
1415+
ui64 hiveId = domains->GetHive(domain->DefaultHiveUid);
1416+
entry.DomainInfo->Params.SetHive(hiveId);
1417+
}
1418+
break;
1419+
}
1420+
case TEvHive::EvResponseHiveNodeStats: {
1421+
auto *x = reinterpret_cast<TEvHive::TEvResponseHiveNodeStats::TPtr*>(&ev);
1422+
auto &record = (*x)->Get()->Record;
1423+
auto *nodeStats = record.MutableNodeStats()->Add();
1424+
nodeStats->SetNodeId(dynNodeId);
1425+
nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId);
1426+
nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId);
1427+
break;
1428+
}
1429+
case TEvSchemeShard::EvDescribeSchemeResult: {
1430+
auto *x = reinterpret_cast<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr*>(&ev);
1431+
auto record = (*x)->Get()->MutableRecord();
1432+
if (record->path() == "/Root/database") {
1433+
record->set_status(NKikimrScheme::StatusSuccess);
1434+
// no pools
1435+
}
1436+
break;
1437+
}
1438+
case TEvBlobStorage::EvControllerConfigResponse: {
1439+
auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerConfigResponse::TPtr*>(&ev);
1440+
TVector<NKikimrBlobStorage::EVDiskStatus> vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY };
1441+
AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses);
1442+
break;
1443+
}
1444+
case TEvWhiteboard::EvSystemStateResponse: {
1445+
auto *x = reinterpret_cast<TEvWhiteboard::TEvSystemStateResponse::TPtr*>(&ev);
1446+
ClearLoadAverage(x);
1447+
break;
1448+
}
1449+
}
1450+
1451+
return TTestActorRuntime::EEventAction::PROCESS;
1452+
};
1453+
runtime.SetObserverFunc(observerFunc);
1454+
1455+
TActorId sender = runtime.AllocateEdgeActor();
1456+
TAutoPtr<IEventHandle> handle;
1457+
1458+
auto *request = new NHealthCheck::TEvSelfCheckRequest;
1459+
request->Request.set_return_verbose_status(true);
1460+
request->Database = "/Root/database";
1461+
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
1462+
const auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
1463+
1464+
Ctest << result.ShortDebugString();
1465+
1466+
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::EMERGENCY);
1467+
UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1);
1468+
const auto &database_status = result.database_status(0);
1469+
UNIT_ASSERT_VALUES_EQUAL(database_status.name(), "/Root/database");
1470+
UNIT_ASSERT_VALUES_EQUAL(database_status.overall(), Ydb::Monitoring::StatusFlag::RED);
1471+
1472+
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().overall(), Ydb::Monitoring::StatusFlag::GREEN);
1473+
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().nodes().size(), 1);
1474+
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().nodes()[0].id(), ToString(dynNodeId));
1475+
1476+
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().overall(), Ydb::Monitoring::StatusFlag::RED);
1477+
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools().size(), 0);
1478+
}
1479+
}
13751480
}

0 commit comments

Comments
 (0)