Skip to content

Commit 64447f6

Browse files
authored
Merge 40bc2a3 into b530042
2 parents b530042 + 40bc2a3 commit 64447f6

File tree

7 files changed

+109
-22
lines changed

7 files changed

+109
-22
lines changed

ydb/core/mind/hive/hive_impl.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2494,19 +2494,19 @@ bool THive::StopSubActor(TSubActorId subActorId) {
24942494
}
24952495

24962496
bool THive::IsValidMetrics(const NKikimrTabletBase::TMetrics& metrics) {
2497-
return IsValidMetricsCPU(metrics) || IsValidMetricsMemory(metrics) || IsValidMetricsNetwork(metrics);
2497+
return IsValidMetricsCPU(metrics.GetCPU()) || IsValidMetricsMemory(metrics.GetMemory()) || IsValidMetricsNetwork(metrics.GetNetwork());
24982498
}
24992499

2500-
bool THive::IsValidMetricsCPU(const NKikimrTabletBase::TMetrics& metrics) {
2501-
return metrics.GetCPU() > 1'000/*1ms*/;
2500+
bool THive::IsValidMetricsCPU(ui64 cpu) {
2501+
return cpu > 1'000/*1ms*/;
25022502
}
25032503

2504-
bool THive::IsValidMetricsMemory(const NKikimrTabletBase::TMetrics& metrics) {
2505-
return metrics.GetMemory() > 128'000/*128KB*/;
2504+
bool THive::IsValidMetricsMemory(ui64 memory) {
2505+
return memory > 128'000/*128KB*/;
25062506
}
25072507

2508-
bool THive::IsValidMetricsNetwork(const NKikimrTabletBase::TMetrics& metrics) {
2509-
return metrics.GetNetwork() > 1024/*1KBps*/;
2508+
bool THive::IsValidMetricsNetwork(ui64 network) {
2509+
return network > 1024/*1KBps*/;
25102510
}
25112511

25122512
TString THive::DebugDomainsActiveNodes() const {

ydb/core/mind/hive/hive_impl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
667667
const TVector<i64>& GetTabletTypeAllowedMetricIds(TTabletTypes::EType type) const;
668668
static const TVector<i64>& GetDefaultAllowedMetricIdsForType(TTabletTypes::EType type);
669669
static bool IsValidMetrics(const NKikimrTabletBase::TMetrics& metrics);
670-
static bool IsValidMetricsCPU(const NKikimrTabletBase::TMetrics& metrics);
671-
static bool IsValidMetricsMemory(const NKikimrTabletBase::TMetrics& metrics);
672-
static bool IsValidMetricsNetwork(const NKikimrTabletBase::TMetrics& metrics);
670+
static bool IsValidMetricsCPU(ui64 cpu);
671+
static bool IsValidMetricsMemory(ui64 memory);
672+
static bool IsValidMetricsNetwork(ui64 network);
673673
void UpdateTotalResourceValues(
674674
const TNodeInfo* node,
675675
const TTabletInfo* tablet,

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4594,6 +4594,86 @@ Y_UNIT_TEST_SUITE(THiveTest) {
45944594
UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2);
45954595
}
45964596

4597+
Y_UNIT_TEST(TestHiveNoBalancingWithLowResourceUsage) {
4598+
static constexpr ui64 NUM_NODES = 5;
4599+
static constexpr ui64 NUM_TABLETS = 100;
4600+
TTestBasicRuntime runtime(NUM_NODES, false);
4601+
Setup(runtime, true, 1, [](TAppPrepare& app) {
4602+
app.HiveConfig.SetTabletKickCooldownPeriod(0);
4603+
app.HiveConfig.SetResourceChangeReactionPeriod(0);
4604+
app.HiveConfig.SetMetricsWindowSize(1);
4605+
});
4606+
const int nodeBase = runtime.GetNodeId(0);
4607+
TActorId senderA = runtime.AllocateEdgeActor();
4608+
const ui64 hiveTablet = MakeDefaultHiveID();
4609+
const ui64 testerTablet = MakeTabletID(false, 1);
4610+
4611+
auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, NUM_NODES> {
4612+
std::array<std::vector<ui64>, NUM_NODES> nodeTablets = {};
4613+
{
4614+
runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo());
4615+
TAutoPtr<IEventHandle> handle;
4616+
TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
4617+
for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
4618+
UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES),
4619+
"nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase);
4620+
nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID());
4621+
}
4622+
}
4623+
return nodeTablets;
4624+
};
4625+
4626+
CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
4627+
4628+
// wait for creation of nodes
4629+
{
4630+
TDispatchOptions options;
4631+
options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES);
4632+
runtime.DispatchEvents(options);
4633+
}
4634+
4635+
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
4636+
std::vector<ui64> tablets;
4637+
tablets.reserve(NUM_TABLETS);
4638+
for (size_t i = 0; i < NUM_TABLETS; ++i) {
4639+
THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
4640+
ev->Record.SetObjectId(i);
4641+
ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
4642+
MakeSureTabletIsUp(runtime, tabletId, 0);
4643+
tablets.push_back(tabletId);
4644+
}
4645+
4646+
auto initialDistribution = getDistribution();
4647+
4648+
// report small metrics for some tablets
4649+
auto rand = CreateDeterministicRandomProvider(777);
4650+
for (auto tablet : tablets) {
4651+
THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>();
4652+
NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics();
4653+
metric->SetTabletID(tablet);
4654+
if (rand->GenRand() % 2) {
4655+
metric->MutableResourceUsage()->SetCPU(1001); // 1% core
4656+
metric->MutableResourceUsage()->SetMemory(150'000); // 150kb
4657+
} else {
4658+
metric->MutableResourceUsage()->SetCPU(999);
4659+
metric->MutableResourceUsage()->SetMemory(100'000);
4660+
}
4661+
4662+
runtime.SendToPipe(hiveTablet, senderA, metrics.Release());
4663+
}
4664+
4665+
{
4666+
TDispatchOptions options;
4667+
options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut);
4668+
runtime.DispatchEvents(options, TDuration::Seconds(10));
4669+
}
4670+
4671+
// Check that balancer moved no tablets
4672+
auto newDistribution = getDistribution();
4673+
4674+
UNIT_ASSERT_EQUAL(initialDistribution, newDistribution);
4675+
}
4676+
45974677
Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) {
45984678
static constexpr ui64 TABLETS_PER_NODE = 10;
45994679
TTestBasicRuntime runtime(3, false);

ydb/core/mind/hive/tablet_info.cpp

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -426,14 +426,17 @@ TResourceRawValues TTabletInfo::GetResourceMaximumValues() const {
426426
}
427427
}
428428

429-
i64 TTabletInfo::GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds) {
430-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) && THive::IsValidMetricsCPU(metrics)) {
429+
i64 TTabletInfo::GetCounterValue(const TTabletMetricsAggregates& metrics, const TVector<i64>& allowedMetricIds) {
430+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU)
431+
&& THive::IsValidMetricsCPU(metrics.MaximumCPU.GetAllTimeMaximum())) {
431432
return 0;
432433
}
433-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) && THive::IsValidMetricsMemory(metrics)) {
434+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory)
435+
&& THive::IsValidMetricsMemory(metrics.MaximumMemory.GetAllTimeMaximum() > 0)) {
434436
return 0;
435437
}
436-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) && THive::IsValidMetricsNetwork(metrics)) {
438+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network)
439+
&& THive::IsValidMetricsNetwork(metrics.MaximumNetwork.GetAllTimeMaximum())) {
437440
return 0;
438441
}
439442
return 1;
@@ -445,13 +448,13 @@ void TTabletInfo::FilterRawValues(TResourceRawValues& values) const {
445448
if (metrics.GetCounter() == 0) {
446449
std::get<NMetrics::EResource::Counter>(values) = 0;
447450
}
448-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics)) {
451+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics.GetCPU())) {
449452
std::get<NMetrics::EResource::CPU>(values) = 0;
450453
}
451-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics)) {
454+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics.GetMemory())) {
452455
std::get<NMetrics::EResource::Memory>(values) = 0;
453456
}
454-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics)) {
457+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics.GetNetwork())) {
455458
std::get<NMetrics::EResource::Network>(values) = 0;
456459
}
457460
}
@@ -462,19 +465,19 @@ void TTabletInfo::FilterRawValues(TResourceNormalizedValues& values) const {
462465
if (metrics.GetCounter() == 0) {
463466
std::get<NMetrics::EResource::Counter>(values) = 0;
464467
}
465-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics)) {
468+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) || !THive::IsValidMetricsCPU(metrics.GetCPU())) {
466469
std::get<NMetrics::EResource::CPU>(values) = 0;
467470
}
468-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics)) {
471+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) || !THive::IsValidMetricsMemory(metrics.GetMemory())) {
469472
std::get<NMetrics::EResource::Memory>(values) = 0;
470473
}
471-
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics)) {
474+
if (!HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) || !THive::IsValidMetricsNetwork(metrics.GetNetwork())) {
472475
std::get<NMetrics::EResource::Network>(values) = 0;
473476
}
474477
}
475478

476479
void TTabletInfo::ActualizeCounter() {
477-
auto value = GetCounterValue(ResourceValues, GetTabletAllowedMetricIds());
480+
auto value = GetCounterValue(ResourceMetricsAggregates, GetTabletAllowedMetricIds());
478481
ResourceValues.SetCounter(value);
479482
}
480483

ydb/core/mind/hive/tablet_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ struct TTabletInfo {
233233
void UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics);
234234
TResourceRawValues GetResourceCurrentValues() const;
235235
TResourceRawValues GetResourceMaximumValues() const;
236-
static i64 GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds);
236+
static i64 GetCounterValue(const TTabletMetricsAggregates& metrics, const TVector<i64>& allowedMetricIds);
237237
void FilterRawValues(TResourceRawValues& values) const;
238238
void FilterRawValues(TResourceNormalizedValues& values) const;
239239
void ActualizeCounter();

ydb/core/protos/metrics.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ package NKikimrMetricsProto;
33
message TMaximumValueUI64 {
44
optional uint64 LastBucketStartTime = 1;
55
repeated uint64 Values = 2;
6+
optional uint64 AllTimeMaximum = 3;
67
}

ydb/core/util/metrics.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,9 @@ class TMaximumValueVariableWindowUI64 : public NKikimrMetricsProto::TMaximumValu
395395
using TProto = NKikimrMetricsProto::TMaximumValueUI64;
396396

397397
void SetValue(TType value, TInstant now = TInstant::Now()) {
398+
if (TProto::GetAllTimeMaximum() > 0 || MaximumValue > 0) { // ignoring initial value
399+
TProto::SetAllTimeMaximum(std::max(value, TProto::GetAllTimeMaximum()));
400+
}
398401
TDuration elapsedCurrentBucket = now - TInstant::MilliSeconds(TProto::GetLastBucketStartTime());
399402
if (TProto::ValuesSize() == 0 || elapsedCurrentBucket >= BucketDuration) {
400403
size_t bucketsPassed = 0;

0 commit comments

Comments
 (0)