Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,7 +2382,9 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
auto& [stats, pool] = *std::max_element(poolStats.begin(), poolStats.end(), [](const TPoolStat& lhs, const TPoolStat& rhs) {
return lhs.first.Scatter < rhs.first.Scatter;
});
if (stats.Scatter > GetMinStorageScatterToBalance()) {
StorageScatter = stats.Scatter;
TabletCounters->Simple()[NHive::COUNTER_STORAGE_SCATTER].Set(StorageScatter * 100);
if (StorageScatter > GetMinStorageScatterToBalance()) {
BLOG_D("Storage Scatter = " << stats.Scatter << " in pool " << pool.Name << ", starting StorageBalancer");
ui64 numReassigns = 1;
auto it = pool.Groups.find(stats.MaxUsageGroupId);
Expand All @@ -2395,6 +2397,7 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
}
StartHiveStorageBalancer({
.NumReassigns = numReassigns,
.MaxInFlight = GetStorageBalancerInflight(),
.StoragePool = pool.Name
});
}
Expand Down
6 changes: 6 additions & 0 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
friend class TTxTabletOwnersReply;
friend class TTxRequestTabletOwners;
friend class TTxUpdateTabletsObject;
friend class TTxUpdateTabletGroups;

friend class TDeleteTabletActor;

Expand Down Expand Up @@ -327,6 +328,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
ui32 DataCenters = 1;
ui32 RegisteredDataCenters = 1;
TObjectDistributions ObjectDistributions;
double StorageScatter = 0;

bool AreWeRootHive() const { return RootHiveId == HiveId; }
bool AreWeSubDomainHive() const { return RootHiveId != HiveId; }
Expand Down Expand Up @@ -925,6 +927,10 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
return CurrentConfig.GetMinStorageScatterToBalance();
}

ui64 GetStorageBalancerInflight() const {
return CurrentConfig.GetStorageBalancerInflight();
}

static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static bool IsSystemTablet(TTabletTypes::EType type);
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/hive/hive_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2786,6 +2786,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
Setup(runtime, true, 2, [](TAppPrepare& app) {
app.HiveConfig.SetMinPeriodBetweenReassign(0);
app.HiveConfig.SetStorageInfoRefreshFrequency(200);
app.HiveConfig.SetMinStorageScatterToBalance(0.5);
});
const ui64 hiveTablet = MakeDefaultHiveID(0);
const ui64 testerTablet = MakeDefaultHiveID(1);
Expand Down
15 changes: 15 additions & 0 deletions ydb/core/mind/hive/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,12 @@ class TTxMonEvent_Settings : public TTransactionBase<THive> {
UpdateConfig(db, "MaxWarmUpPeriod");
UpdateConfig(db, "WarmUpEnabled");
UpdateConfig(db, "ObjectImbalanceToBalance");
UpdateConfig(db, "ChannelBalanceStrategy");
UpdateConfig(db, "MaxChannelHistorySize");
UpdateConfig(db, "StorageInfoRefreshFrequency");
UpdateConfig(db, "MinStorageScatterToBalance");
UpdateConfig(db, "MinGroupUsageToBalance");
UpdateConfig(db, "StorageBalancerInflight");

if (params.contains("BalancerIgnoreTabletTypes")) {
TVector<TString> tabletTypeNames = SplitString(params.Get("BalancerIgnoreTabletTypes"), ";");
Expand Down Expand Up @@ -1111,6 +1117,12 @@ class TTxMonEvent_Settings : public TTransactionBase<THive> {
ShowConfig(out, "MaxWarmUpPeriod");
ShowConfig(out, "WarmUpEnabled");
ShowConfig(out, "ObjectImbalanceToBalance");
ShowConfig(out, "ChannelBalanceStrategy");
ShowConfig(out, "MaxChannelHistorySize");
ShowConfig(out, "StorageInfoRefreshFrequency");
ShowConfig(out, "MinStorageScatterToBalance");
ShowConfig(out, "MinGroupUsageToBalance");
ShowConfig(out, "StorageBalancerInflight");
ShowConfigForBalancerIgnoreTabletTypes(out);

out << "<div class='row' style='margin-top:40px'>";
Expand Down Expand Up @@ -1405,6 +1417,7 @@ class TTxMonEvent_Landing : public TTransactionBase<THive> {
out << "<tr><td>Network</td><td id='resourceScatterNetwork'></td></tr>";
out << "<tr><td>MaxUsage</td><td id='maxUsage'></td></tr>";
out << "<tr><td>Imbalance</td><td id='objectImbalance'></td></tr>";
out << "<tr><td>Storage</td><td id='storageScatter'></td></tr>";
out << "</table></div>";
out << "<div style='min-width:220px'><table class='simple-table3'>";
out << "<tr><th>Balancer</th><th style='min-width:50px'>Runs</th><th style='min-width:50px'>Moves</th>";
Expand Down Expand Up @@ -1896,6 +1909,7 @@ function fillDataShort(result) {
$('#waitQueue').html(result.WaitQueueSize);
$('#maxUsage').html(result.MaxUsage);
$('#objectImbalance').html(result.ObjectImbalance);
$('#storageScatter').html(result.StorageScatter);

$('#resourceTotalCounter').html(result.ResourceTotal.Counter);
$('#resourceTotalCPU').html(result.ResourceTotal.CPU);
Expand Down Expand Up @@ -2205,6 +2219,7 @@ class TTxMonEvent_LandingData : public TTransactionBase<THive> {
jsonData["ScatterHtml"]["Memory"] = std::get<NMetrics::EResource::Memory>(scatterHtml);
jsonData["ScatterHtml"]["Network"] = std::get<NMetrics::EResource::Network>(scatterHtml);
jsonData["ObjectImbalance"] = GetValueWithColoredGlyph(Self->ObjectDistributions.GetMaxImbalance(), Self->GetObjectImbalanceToBalance());
jsonData["StorageScatter"] = GetValueWithColoredGlyph(Self->StorageScatter, Self->GetMinStorageScatterToBalance());
jsonData["WarmUp"] = Self->WarmUp;

if (Cgi.Get("nodes") == "1") {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/mind/hive/tx__update_tablet_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ class TTxUpdateTabletGroups : public TTransactionBase<THive> {
// Use best effort to kill currently running tablet
SideEffects.Register(CreateTabletKiller(TabletId, /* nodeId */ 0, tablet->KnownGeneration));
}
SideEffects.Callback([counters = Self->TabletCounters] { counters->Cumulative()[NHive::COUNTER_TABLETS_STORAGE_REASSIGNED].Increment(1); });
}
if (needToIncreaseGeneration) {
tablet->IncreaseGeneration();
Expand Down
9 changes: 5 additions & 4 deletions ydb/core/protos/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1375,9 +1375,9 @@ message THiveConfig {
}

enum EHiveChannelBalanceStrategy {
HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 1;
HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 2;
HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 3;
HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 0;
HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 1;
HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 2;
Comment on lines +1378 to +1380
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This introduces compatibility issues with config versions?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does, but this is the PR that adds the ability to use this enum through web ui. The enum is not in any of stable releases, and I don't believe anyone has a yaml-config with it anywhere at this point.
(I changed it because it turns out C++ proto enum descriptors assume that enum values are consecutive and start with zero)

}

enum EHiveNodeSelectStrategy {
Expand Down Expand Up @@ -1460,8 +1460,9 @@ message THiveConfig {
optional EHiveChannelBalanceStrategy ChannelBalanceStrategy = 68 [default = HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM];
optional uint64 MaxChannelHistorySize = 69 [default = 200];
optional uint64 StorageInfoRefreshFrequency = 70 [default = 600000]; // send a query to BSC every x milliseconds
optional double MinStorageScatterToBalance = 71 [default = 0.5];
optional double MinStorageScatterToBalance = 71 [default = 999]; // storage balancer trigger is disabled by default
optional double MinGroupUsageToBalance = 72 [default = 0.1];
optional uint64 StorageBalancerInflight = 73 [default = 1];
}

message TColumnShardConfig {
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/protos/counters_hive.proto
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ enum ESimpleCounters {
COUNTER_BALANCE_OBJECT_IMBALANCE = 18 [(CounterOpts) = {Name: "BalanceObjectImbalance"}];
COUNTER_IMBALANCED_OBJECTS = 19 [(CounterOpts) = {Name: "ImbalancedObjects"}];
COUNTER_WORST_OBJECT_VARIANCE = 20 [(CounterOpts) = {Name: "WorstObjectVariance"}];
COUNTER_STORAGE_SCATTER = 21 [(CounterOpts) = {Name: "StorageScatter"}];
}

enum ECumulativeCounters {
Expand All @@ -44,6 +45,7 @@ enum ECumulativeCounters {
COUNTER_SUGGESTED_SCALE_UP = 10 [(CounterOpts) = {Name: "SuggestedScaleUp"}];
COUNTER_SUGGESTED_SCALE_DOWN = 11 [(CounterOpts) = {Name: "SuggestedScaleDown"}];
COUNTER_STORAGE_BALANCER_EXECUTED = 12 [(CounterOpts) = {Name: "StorageBalancerExecuted"}];
COUNTER_TABLETS_STORAGE_REASSIGNED = 13 [(CounterOpts) = {Name: "TabletsStorageReassigned"}];
}

enum EPercentileCounters {
Expand Down