Skip to content

Commit bbdb418

Browse files
adameatshmel1k
authored andcommitted
Reduce severity of some health check issues (#3067)
1 parent 6fbfd4a commit bbdb418

File tree

1 file changed

+5
-9
lines changed

1 file changed

+5
-9
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,19 +1140,15 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11401140
static void Check(TSelfCheckContext& context, const NKikimrWhiteboard::TSystemStateInfo::TPoolStats& poolStats) {
11411141
if (poolStats.name() == "System" || poolStats.name() == "IC" || poolStats.name() == "IO") {
11421142
if (poolStats.usage() >= 0.99) {
1143-
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Pool usage over 99%", ETags::OverloadState);
1143+
context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage is over than 99%", ETags::OverloadState);
11441144
} else if (poolStats.usage() >= 0.95) {
1145-
context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 95%", ETags::OverloadState);
1146-
} else if (poolStats.usage() >= 0.90) {
1147-
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 90%", ETags::OverloadState);
1145+
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage is over than 95%", ETags::OverloadState);
11481146
} else {
11491147
context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
11501148
}
11511149
} else {
11521150
if (poolStats.usage() >= 0.99) {
1153-
context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 99%", ETags::OverloadState);
1154-
} else if (poolStats.usage() >= 0.95) {
1155-
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 95%", ETags::OverloadState);
1151+
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage is over than 99%", ETags::OverloadState);
11561152
} else {
11571153
context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
11581154
}
@@ -1222,7 +1218,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12221218
break;
12231219
case TNodeTabletState::ETabletState::RestartsTooOften:
12241220
computeTabletStatus.set_state("RESTARTS_TOO_OFTEN");
1225-
tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are restarting too often", ETags::TabletState);
1221+
tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Tablets are restarting too often", ETags::TabletState);
12261222
break;
12271223
case TNodeTabletState::ETabletState::Dead:
12281224
computeTabletStatus.set_state("DEAD");
@@ -1261,7 +1257,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12611257

12621258
TSelfCheckContext rrContext(&context, "NODE_UPTIME");
12631259
if (databaseState.NodeRestartsPerPeriod[nodeId] >= 30) {
1264-
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Node is restarting too often", ETags::Uptime);
1260+
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Node is restarting too often", ETags::Uptime);
12651261
} else if (databaseState.NodeRestartsPerPeriod[nodeId] >= 10) {
12661262
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "The number of node restarts has increased", ETags::Uptime);
12671263
} else {

0 commit comments

Comments
 (0)