Skip to content

Commit ec8226d

Browse files
authored
Merge 6339986 into 8d189a1
2 parents 8d189a1 + 6339986 commit ec8226d

File tree

5 files changed

+107
-7
lines changed

5 files changed

+107
-7
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4594,6 +4594,96 @@ Y_UNIT_TEST_SUITE(THiveTest) {
45944594
UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2);
45954595
}
45964596

4597+
Y_UNIT_TEST(TestHiveNoBalancingWithLowResourceUsage) {
4598+
static constexpr ui64 NUM_NODES = 5;
4599+
static constexpr ui64 NUM_TABLETS = 100;
4600+
TTestBasicRuntime runtime(NUM_NODES, false);
4601+
Setup(runtime, true, 1, [](TAppPrepare& app) {
4602+
app.HiveConfig.SetTabletKickCooldownPeriod(0);
4603+
app.HiveConfig.SetResourceChangeReactionPeriod(0);
4604+
app.HiveConfig.SetMetricsWindowSize(1);
4605+
});
4606+
const int nodeBase = runtime.GetNodeId(0);
4607+
TActorId senderA = runtime.AllocateEdgeActor();
4608+
const ui64 hiveTablet = MakeDefaultHiveID();
4609+
const ui64 testerTablet = MakeTabletID(false, 1);
4610+
4611+
auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, NUM_NODES> {
4612+
std::array<std::vector<ui64>, NUM_NODES> nodeTablets = {};
4613+
{
4614+
runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo());
4615+
TAutoPtr<IEventHandle> handle;
4616+
TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
4617+
for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
4618+
UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES),
4619+
"nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase);
4620+
nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID());
4621+
}
4622+
}
4623+
return nodeTablets;
4624+
};
4625+
4626+
CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
4627+
4628+
// wait for creation of nodes
4629+
{
4630+
TDispatchOptions options;
4631+
options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES);
4632+
runtime.DispatchEvents(options);
4633+
}
4634+
4635+
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
4636+
std::vector<ui64> tablets;
4637+
tablets.reserve(NUM_TABLETS);
4638+
for (size_t i = 0; i < NUM_TABLETS; ++i) {
4639+
THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
4640+
ev->Record.SetObjectId(i);
4641+
ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
4642+
MakeSureTabletIsUp(runtime, tabletId, 0);
4643+
tablets.push_back(tabletId);
4644+
}
4645+
4646+
auto initialDistribution = getDistribution();
4647+
4648+
// report small metrics for some tablets
4649+
auto rand = CreateDeterministicRandomProvider(777);
4650+
for (auto tablet : tablets) {
4651+
THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>();
4652+
NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics();
4653+
metric->SetTabletID(tablet);
4654+
if (rand->GenRand() % 2) {
4655+
metric->MutableResourceUsage()->SetCPU(1001); // 1% core
4656+
metric->MutableResourceUsage()->SetMemory(150'000); // 150kb
4657+
} else {
4658+
metric->MutableResourceUsage()->SetCPU(999);
4659+
metric->MutableResourceUsage()->SetMemory(100'000);
4660+
}
4661+
4662+
runtime.SendToPipe(hiveTablet, senderA, metrics.Release());
4663+
}
4664+
4665+
{
4666+
TDispatchOptions options;
4667+
options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut);
4668+
runtime.DispatchEvents(options, TDuration::Seconds(10));
4669+
}
4670+
4671+
// Check that balancer moved no tablets
4672+
auto newDistribution = getDistribution();
4673+
4674+
UNIT_ASSERT_EQUAL(initialDistribution, newDistribution);
4675+
4676+
{
4677+
auto request = std::make_unique<TEvHive::TEvRequestHiveDomainStats>();
4678+
request->Record.SetReturnMetrics(true);
4679+
runtime.SendToPipe(hiveTablet, senderA, request.release());
4680+
TAutoPtr<IEventHandle> handle;
4681+
TEvHive::TEvResponseHiveDomainStats* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveDomainStats>(handle);
4682+
ui64 totalCounter = response->Record.GetDomainStats(0).GetMetrics().GetCounter();
4683+
UNIT_ASSERT_VALUES_EQUAL(totalCounter, 0);
4684+
}
4685+
}
4686+
45974687
Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) {
45984688
static constexpr ui64 TABLETS_PER_NODE = 10;
45994689
TTestBasicRuntime runtime(3, false);

ydb/core/mind/hive/tablet_info.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -426,14 +426,21 @@ TResourceRawValues TTabletInfo::GetResourceMaximumValues() const {
426426
}
427427
}
428428

429-
i64 TTabletInfo::GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds) {
430-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU) && THive::IsValidMetricsCPU(metrics)) {
429+
i64 TTabletInfo::GetCounterValue() const {
430+
const auto& allowedMetricIds = GetTabletAllowedMetricIds();
431+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::CPU)
432+
&& (ResourceMetricsAggregates.MaximumCPU.GetAllTimeMaximum() > 0
433+
|| ResourceValues.GetCPU() > 0)) {
431434
return 0;
432435
}
433-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory) && THive::IsValidMetricsMemory(metrics)) {
436+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Memory)
437+
&& (ResourceMetricsAggregates.MaximumMemory.GetAllTimeMaximum() > 0
438+
|| ResourceValues.GetMemory() > 0)) {
434439
return 0;
435440
}
436-
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network) && THive::IsValidMetricsNetwork(metrics)) {
441+
if (HasAllowedMetric(allowedMetricIds, EResourceToBalance::Network)
442+
&& (ResourceMetricsAggregates.MaximumNetwork.GetAllTimeMaximum() > 0
443+
|| ResourceValues.GetNetwork() > 0)) {
437444
return 0;
438445
}
439446
return 1;
@@ -474,8 +481,7 @@ void TTabletInfo::FilterRawValues(TResourceNormalizedValues& values) const {
474481
}
475482

476483
void TTabletInfo::ActualizeCounter() {
477-
auto value = GetCounterValue(ResourceValues, GetTabletAllowedMetricIds());
478-
ResourceValues.SetCounter(value);
484+
ResourceValues.SetCounter(GetCounterValue());
479485
}
480486

481487
const TNodeFilter& TTabletInfo::GetNodeFilter() const {

ydb/core/mind/hive/tablet_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ struct TTabletInfo {
233233
void UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics);
234234
TResourceRawValues GetResourceCurrentValues() const;
235235
TResourceRawValues GetResourceMaximumValues() const;
236-
static i64 GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds);
236+
i64 GetCounterValue() const;
237237
void FilterRawValues(TResourceRawValues& values) const;
238238
void FilterRawValues(TResourceNormalizedValues& values) const;
239239
void ActualizeCounter();

ydb/core/protos/metrics.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ package NKikimrMetricsProto;
33
message TMaximumValueUI64 {
44
optional uint64 LastBucketStartTime = 1;
55
repeated uint64 Values = 2;
6+
optional uint64 AllTimeMaximum = 3;
67
}

ydb/core/util/metrics.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,9 @@ class TMaximumValueVariableWindowUI64 : public NKikimrMetricsProto::TMaximumValu
395395
using TProto = NKikimrMetricsProto::TMaximumValueUI64;
396396

397397
void SetValue(TType value, TInstant now = TInstant::Now()) {
398+
if (TProto::GetAllTimeMaximum() > 0 || MaximumValue > 0) { // ignoring initial value
399+
TProto::SetAllTimeMaximum(std::max(value, TProto::GetAllTimeMaximum()));
400+
}
398401
TDuration elapsedCurrentBucket = now - TInstant::MilliSeconds(TProto::GetLastBucketStartTime());
399402
if (TProto::ValuesSize() == 0 || elapsedCurrentBucket >= BucketDuration) {
400403
size_t bucketsPassed = 0;

0 commit comments

Comments
 (0)