Skip to content

Commit a392049

Browse files
[yugabyte#20118] docdb: Add load balancer metrics for blacklist and data move progress
Summary: This diff adds the following three metrics to the load balancer: - `tablets_in_wrong_placement`: the number of tablets in invalid or blacklisted locations - `blacklisted_leaders`: the number of tablet leaders in leader blacklisted locations - `tablet_load_variance`: the sum across tables of the tablet load variance (difference between the number of running or starting tablets on the highest and lowest load tablet servers) Jira: DB-9054 Test Plan: ``` ybd --cxx-test load_balancer_mini_cluster-test --gtest_filter LoadBalancerMiniClusterTest.TabletsInWrongPlacementMetric ybd --cxx-test load_balancer_mini_cluster-test --gtest_filter LoadBalancerMiniClusterTest.BlacklistedLeadersMetric ybd --cxx-test load_balancer_mini_cluster-test --gtest_filter LoadBalancerMiniClusterTest.TabletLoadVarianceMetric ``` Reviewers: mlillibridge Reviewed By: mlillibridge Subscribers: ybase, bogdan Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D30650
1 parent 832cf0a commit a392049

File tree

7 files changed

+221
-10
lines changed

7 files changed

+221
-10
lines changed

src/yb/integration-tests/load_balancer_mini_cluster-test.cc

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,31 @@
2929

3030
#include "yb/tools/yb-admin_client.h"
3131

32+
#include "yb/tablet/tablet_peer.h"
33+
3234
#include "yb/tserver/mini_tablet_server.h"
3335
#include "yb/tserver/tablet_server.h"
3436
#include "yb/tserver/tablet_server_options.h"
3537

3638
#include "yb/util/backoff_waiter.h"
39+
#include "yb/util/metrics.h"
3740
#include "yb/util/monotime.h"
3841
#include "yb/util/multi_drive_test_env.h"
3942

43+
METRIC_DECLARE_gauge_uint32(blacklisted_leaders);
4044
METRIC_DECLARE_event_stats(load_balancer_duration);
45+
METRIC_DECLARE_gauge_uint32(tablets_in_wrong_placement);
46+
METRIC_DECLARE_gauge_uint32(total_table_load_difference);
4147

48+
DECLARE_int32(catalog_manager_bg_task_wait_ms);
4249
DECLARE_bool(enable_load_balancing);
4350
DECLARE_bool(load_balancer_drive_aware);
44-
DECLARE_int32(catalog_manager_bg_task_wait_ms);
51+
DECLARE_int32(load_balancer_max_concurrent_moves);
52+
DECLARE_int32(replication_factor);
4553
DECLARE_int32(TEST_slowdown_master_async_rpc_tasks_by_ms);
4654
DECLARE_int32(TEST_load_balancer_wait_ms);
4755
DECLARE_int32(TEST_load_balancer_wait_after_count_pending_tasks_ms);
4856
DECLARE_bool(tserver_heartbeat_metrics_add_drive_data);
49-
DECLARE_int32(load_balancer_max_concurrent_moves);
5057
DECLARE_int32(tserver_heartbeat_metrics_interval_ms);
5158

5259
using namespace std::literals;
@@ -238,6 +245,20 @@ class LoadBalancerMiniClusterTest : public LoadBalancerMiniClusterTestBase {
238245
int num_tablets() override {
239246
return 4;
240247
}
248+
249+
Status AddTserverToBlacklist(size_t idx, bool leader_blacklist) {
250+
HostPort ts_hostport(mini_cluster()->mini_tablet_server(idx)->bound_rpc_addr());
251+
RETURN_NOT_OK(
252+
yb_admin_client_->ChangeBlacklist({ts_hostport}, true /* add */, leader_blacklist));
253+
return Status::OK();
254+
}
255+
256+
Status RemoveTserverFromBlacklist(size_t idx, bool leader_blacklist) {
257+
HostPort ts_hostport(mini_cluster()->mini_tablet_server(idx)->bound_rpc_addr());
258+
RETURN_NOT_OK(
259+
yb_admin_client_->ChangeBlacklist({ts_hostport}, false /* add */, leader_blacklist));
260+
return Status::OK();
261+
}
241262
};
242263

243264
class LoadBalancerMiniClusterRf3Test : public LoadBalancerMiniClusterTest {
@@ -271,6 +292,122 @@ TEST_F(LoadBalancerMiniClusterRf3Test, DurationMetric) {
271292
}, 10s, "load_balancer_duration value resets"));
272293
}
273294

295+
TEST_F(LoadBalancerMiniClusterTest, TabletsInWrongPlacementMetric) {
296+
const int ts_idx = 0;
297+
auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster());
298+
auto cluster_metric_entity = mini_master->master()->metric_entity_cluster();
299+
auto tablets_in_wrong_placement =
300+
cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>(METRIC_tablets_in_wrong_placement);
301+
ASSERT_EQ(tablets_in_wrong_placement->value(), 0);
302+
303+
// Prevent moves so we can reliably read the metric and get a non-zero value.
304+
FLAGS_load_balancer_max_concurrent_adds = 0;
305+
306+
unsigned int peers_on_ts = 0;
307+
auto ts_uuid = mini_cluster_->mini_tablet_server(ts_idx)->server()->permanent_uuid();
308+
for (const auto& peer : ListTabletPeers(mini_cluster(), ListPeersFilter::kAll)) {
309+
if (peer->permanent_uuid() == ts_uuid) {
310+
++peers_on_ts;
311+
}
312+
}
313+
ASSERT_GT(peers_on_ts, 0);
314+
315+
ASSERT_EQ(tablets_in_wrong_placement->value(), 0);
316+
317+
// Blacklist first tserver.
318+
ASSERT_OK(AddTserverToBlacklist(ts_idx, false /* leader_blacklist */));
319+
SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms);
320+
ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts);
321+
322+
// Change placement info to make first tserver invalid.
323+
// The invalid and blacklisted tablets should not be double-counted.
324+
ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("cloud1.rack2.zone,cloud2.rack3.zone", 3, ""));
325+
SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms);
326+
ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts);
327+
328+
// Unblacklist first tserver.
329+
// The tablets are still invalid, so the metric should not change.
330+
ASSERT_OK(RemoveTserverFromBlacklist(ts_idx, false /* leader_blacklist */));
331+
SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms);
332+
ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts);
333+
334+
// Change placement info to make first tserver valid again.
335+
ASSERT_OK(yb_admin_client_->ModifyPlacementInfo(
336+
"cloud1.rack1.zone,cloud1.rack2.zone,cloud2.rack3.zone", 3, ""));
337+
ASSERT_OK(WaitFor([&] {
338+
return tablets_in_wrong_placement->value() == 0;
339+
}, 5s, "Wait for tablets_in_wrong_placement to be 0"));
340+
}
341+
342+
TEST_F(LoadBalancerMiniClusterTest, BlacklistedLeadersMetric) {
343+
auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster());
344+
auto cluster_metric_entity = mini_master->master()->metric_entity_cluster();
345+
auto blacklisted_leaders =
346+
cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>(METRIC_blacklisted_leaders);
347+
ASSERT_EQ(blacklisted_leaders->value(), 0);
348+
349+
// Prevent leader moves so we can reliably read the metric and get a non-zero value.
350+
FLAGS_load_balancer_max_concurrent_moves = 0;
351+
352+
// Leader blacklist first tserver.
353+
ASSERT_OK(AddTserverToBlacklist(0 /* idx */, true /* leader_blacklist */));
354+
ASSERT_OK(WaitFor([&] {
355+
return blacklisted_leaders->value() > 0;
356+
}, 5s, "Wait for blacklisted_leaders to reflect blacklisted leader"));
357+
}
358+
359+
TEST_F(LoadBalancerMiniClusterTest, TableLoadDifferenceMetric) {
360+
const auto kNumTablets = static_cast<uint32_t>(num_tablets());
361+
auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster());
362+
auto cluster_metric_entity = mini_master->master()->metric_entity_cluster();
363+
auto load_difference_metric = cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>(
364+
METRIC_total_table_load_difference);
365+
366+
ASSERT_EQ(load_difference_metric->value(), 0);
367+
368+
// Prevent moves temporarily so we can reliably read the metric and get a non-zero value.
369+
ANNOTATE_UNPROTECTED_WRITE(FLAGS_load_balancer_max_concurrent_adds) = 0;
370+
371+
// Add a new node and wait for load difference to equal kNumTablets (each existing node has
372+
// kNumTablets tablets, and the new node has 0).
373+
auto new_ts_index = mini_cluster()->num_tablet_servers();
374+
ASSERT_OK(mini_cluster()->AddTabletServer());
375+
ASSERT_OK(mini_cluster()->WaitForTabletServerCount(new_ts_index + 1));
376+
ASSERT_OK(WaitFor([&] {
377+
return load_difference_metric->value() == kNumTablets;
378+
}, 5s, "load_difference reflects new node"));
379+
380+
// Enable moves and verify that load_difference monotonically decreases to 0.
381+
auto load_difference_low_water_mark = load_difference_metric->value();
382+
ANNOTATE_UNPROTECTED_WRITE(FLAGS_load_balancer_max_concurrent_adds) = 1;
383+
ASSERT_OK(WaitFor([&]() -> Result<bool> {
384+
auto load_difference = load_difference_metric->value();
385+
if (load_difference > load_difference_low_water_mark) {
386+
return STATUS_FORMAT(
387+
IllegalState, "load_difference unexpectedly increased from $0 to $1",
388+
load_difference_low_water_mark, load_difference);
389+
}
390+
load_difference_low_water_mark = load_difference;
391+
return load_difference == 0 && VERIFY_RESULT(client_->IsLoadBalancerIdle());
392+
}, 30s, "Wait for tablet moves after adding ts-3"));
393+
394+
// Blacklist first tserver.
395+
// The replicas should move off the blacklisted tserver and load_difference should increase to
396+
// kNumTablets again.
397+
auto load_difference_high_water_mark = load_difference_metric->value();
398+
ASSERT_OK(AddTserverToBlacklist(0 /* idx */, false /* leader_blacklist */));
399+
ASSERT_OK(WaitFor([&]() -> Result<bool> {
400+
auto load_difference = load_difference_metric->value();
401+
if (load_difference < load_difference_high_water_mark) {
402+
return STATUS_FORMAT(
403+
IllegalState, "load_difference unexpectedly decreased from $0 to $1",
404+
load_difference_high_water_mark, load_difference);
405+
}
406+
load_difference_high_water_mark = load_difference;
407+
return load_difference == kNumTablets && VERIFY_RESULT(client_->IsLoadBalancerIdle());
408+
}, 30s, "Wait for tablet moves after blacklisting ts-0"));
409+
}
410+
274411
// See issue #6278. This test tests the segfault that used to occur during a rare race condition,
275412
// where we would have an uninitialized TSDescriptor that we try to access.
276413
// To trigger the race condition, we need a pending add task that gets completed after

src/yb/master/cluster_balance.cc

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,33 @@ METRIC_DEFINE_gauge_int64(cluster,
134134
"Is load balancing enabled in the cluster where "
135135
"1 indicates it is enabled.");
136136

137+
METRIC_DEFINE_gauge_uint32(cluster,
138+
tablets_in_wrong_placement,
139+
"Tablets in Wrong/Blacklisted Placement",
140+
yb::MetricUnit::kUnits,
141+
"Number of tablet peers in invalid or blacklisted locations.");
142+
143+
METRIC_DEFINE_gauge_uint32(cluster,
144+
blacklisted_leaders,
145+
"Blacklisted Leaders",
146+
yb::MetricUnit::kUnits,
147+
"Number of tablet leaders in locations from which leaders are "
148+
"blacklisted.");
149+
150+
METRIC_DEFINE_gauge_uint32(cluster,
151+
total_table_load_difference,
152+
"Sum of Table Load Difference",
153+
yb::MetricUnit::kUnits,
154+
"This metric is the sum of every table's load difference, where a "
155+
"table's load difference is the maximum load difference for that table "
156+
"between all pairs of TServers that are valid hosts for its tablets. "
157+
"Here, the load difference for a table between a pair of TServers is "
158+
"the positive difference between the number of running/starting tablet "
159+
"peers belonging to that table hosted on those TServers. Exception: "
160+
"the load difference is defined as 0 if it otherwise would be 1 because "
161+
"the load balancer cannot fix a load difference of 1 by moving a tablet "
162+
"peer from one TServer to another.");
163+
137164
namespace yb {
138165
namespace master {
139166

@@ -255,6 +282,10 @@ size_t ClusterLoadBalancer::get_total_wrong_placement() const {
255282
return state_->tablets_wrong_placement_.size();
256283
}
257284

285+
size_t ClusterLoadBalancer::get_badly_placed_leaders() const {
286+
return state_->tablets_with_badly_placed_leaders_.size();
287+
}
288+
258289
size_t ClusterLoadBalancer::get_total_blacklisted_servers() const {
259290
return global_state_->blacklisted_servers_.size();
260291
}
@@ -312,6 +343,12 @@ ClusterLoadBalancer::~ClusterLoadBalancer() = default;
312343
void ClusterLoadBalancer::InitMetrics() {
313344
is_load_balancing_enabled_metric_ = METRIC_is_load_balancing_enabled.Instantiate(
314345
catalog_manager_->master_->metric_entity_cluster(), 0);
346+
tablets_in_wrong_placement_metric_ = METRIC_tablets_in_wrong_placement.Instantiate(
347+
catalog_manager_->master_->metric_entity_cluster(), 0);
348+
blacklisted_leaders_metric_ = METRIC_blacklisted_leaders.Instantiate(
349+
catalog_manager_->master_->metric_entity_cluster(), 0);
350+
total_table_load_difference_metric_ = METRIC_total_table_load_difference.Instantiate(
351+
catalog_manager_->master_->metric_entity_cluster(), 0);
315352
}
316353

317354
// This function uses the following stratification of vlog levels:
@@ -452,6 +489,10 @@ void ClusterLoadBalancer::RunLoadBalancerWithOptions(Options* options) {
452489
// At the start of the run, report LB state that might prevent it from running smoothly.
453490
ReportUnusualLoadBalancerState();
454491

492+
uint32_t total_tablets_in_wrong_placement = 0;
493+
uint32_t total_blacklisted_leaders = 0;
494+
uint32_t total_table_load_difference = 0;
495+
455496
// Loop over all tables to analyze the global and per-table load.
456497
for (const auto& table : GetTables()) {
457498
if (SkipLoadBalancing(*table)) {
@@ -474,8 +515,29 @@ void ClusterLoadBalancer::RunLoadBalancerWithOptions(Options* options) {
474515
per_table_states_.erase(table->id());
475516
master_errors++;
476517
}
518+
519+
if (!state_->sorted_load_.empty()) {
520+
// Only report values greater than 1 since the LB cannot fix a load difference of 1.
521+
// This makes the total table load difference metric more interpretable since we would
522+
// otherwise add 1 for every table has a difference of 1 between its most/least loaded
523+
// tservers, even though the table load is balanced.
524+
const auto& low_load_uuid = state_->sorted_load_.front();
525+
const auto& high_load_uuid = state_->sorted_load_.back();
526+
const int actual_load_difference =
527+
narrow_cast<int>(state_->GetLoad(high_load_uuid) - state_->GetLoad(low_load_uuid));
528+
if (actual_load_difference > 1) {
529+
total_table_load_difference += actual_load_difference;
530+
}
531+
}
532+
total_tablets_in_wrong_placement += get_total_wrong_placement();
533+
total_blacklisted_leaders += get_badly_placed_leaders();
477534
}
478535

536+
// Update metrics.
537+
tablets_in_wrong_placement_metric_->set_value(total_tablets_in_wrong_placement);
538+
blacklisted_leaders_metric_->set_value(total_blacklisted_leaders);
539+
total_table_load_difference_metric_->set_value(total_table_load_difference);
540+
479541
VLOG(1) << "Global state after analyzing all tablets: " << global_state_->ToString();
480542

481543
bool task_added = false;

src/yb/master/cluster_balance.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ class ClusterLoadBalancer {
348348
int get_total_running_tablets() const;
349349

350350
size_t get_total_wrong_placement() const;
351+
size_t get_badly_placed_leaders() const;
351352
size_t get_total_blacklisted_servers() const;
352353
size_t get_total_leader_blacklisted_servers() const;
353354

@@ -362,8 +363,10 @@ class ClusterLoadBalancer {
362363
// managed by this class, but by the Master's unique_ptr.
363364
CatalogManager* catalog_manager_;
364365

365-
// Info about if load balancing is enabled in the cluster.
366366
scoped_refptr<AtomicGauge<int64_t>> is_load_balancing_enabled_metric_;
367+
scoped_refptr<AtomicGauge<uint32_t>> tablets_in_wrong_placement_metric_;
368+
scoped_refptr<AtomicGauge<uint32_t>> blacklisted_leaders_metric_;
369+
scoped_refptr<AtomicGauge<uint32_t>> total_table_load_difference_metric_;
367370

368371
std::shared_ptr<YsqlTablespaceManager> tablespace_manager_;
369372

src/yb/master/cluster_balance_util.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ Status PerTableLoadState::UpdateTablet(TabletInfo *tablet) {
380380
if (tablet_meta.has_wrong_placements()) {
381381
tablets_wrong_placement_.insert(tablet_id);
382382
}
383+
if (tablet_meta.has_badly_placed_leader()) {
384+
tablets_with_badly_placed_leaders_.insert(tablet_id);
385+
}
383386

384387
return Status::OK();
385388
}

src/yb/master/cluster_balance_util.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ struct CBTabletMetadata {
5757
return !wrong_placement_tablet_servers.empty() || !blacklisted_tablet_servers.empty();
5858
}
5959

60-
bool has_blacklisted_leader() {
60+
bool has_badly_placed_leader() {
6161
return !leader_blacklisted_tablet_servers.empty();
6262
}
6363

@@ -427,7 +427,8 @@ class PerTableLoadState {
427427
out << Format("tablets_missing_replicas: $0, ", tablets_missing_replicas_);
428428
out << Format("tablets_over_replicated: $0, ", tablets_over_replicated_);
429429
out << Format("tablets_wrong_placement: $0, ", tablets_wrong_placement_);
430-
out << Format("tablets_added: $0, ", tablets_wrong_placement_);
430+
out << Format("tablets_with_badly_placed_leaders: $0, ", tablets_with_badly_placed_leaders_);
431+
out << Format("tablets_added: $0, ", tablets_added_);
431432
out << Format("leader_balance_threshold: $0, ", leader_balance_threshold_);
432433
out << Format("sorted_leader_load: $0, ", sorted_leader_load_);
433434
out << Format("use_preferred_zones: $0, ", use_preferred_zones_);
@@ -482,6 +483,9 @@ class PerTableLoadState {
482483
// Set of tablet ids that have been determined to have replicas in incorrect placements.
483484
std::set<TabletId> tablets_wrong_placement_;
484485

486+
// Set of tablet ids that have leaders in leader blacklisted placements.
487+
std::set<TabletId> tablets_with_badly_placed_leaders_;
488+
485489
// List of tablet ids that have been added to a new tablet server.
486490
std::set<TabletId> tablets_added_;
487491

src/yb/util/metric_entity.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,6 @@ void MetricEntity::CheckInstantiation(const MetricPrototype* proto) const {
176176
<< "Metric name is not compatible with Prometheus: " << proto->name();
177177
}
178178

179-
scoped_refptr<Metric> MetricEntity::FindOrNull(const MetricPrototype& prototype) const {
180-
std::lock_guard l(lock_);
181-
return FindPtrOrNull(metric_map_, &prototype);
182-
}
183-
184179
bool MetricEntity::TEST_ContainMetricName(const std::string& metric_name) const {
185180
std::lock_guard l(lock_);
186181
for (const MetricMap::value_type& val : metric_map_) {

src/yb/util/metric_entity.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ class MetricEntity : public RefCountedThreadSafe<MetricEntity> {
149149

150150
// Return the metric instantiated from the given prototype, or NULL if none has been
151151
// instantiated. Primarily used by tests trying to read metric values.
152+
template<typename Metric>
152153
scoped_refptr<Metric> FindOrNull(const MetricPrototype& prototype) const;
153154

154155
const std::string& id() const { return id_; }
@@ -248,6 +249,12 @@ scoped_refptr<Metric> MetricEntity::FindOrCreateMetric(PrototypePtr proto, Args&
248249
return m;
249250
}
250251

252+
template<typename Metric>
253+
scoped_refptr<Metric> MetricEntity::FindOrNull(const MetricPrototype& prototype) const {
254+
std::lock_guard l(lock_);
255+
return down_cast<Metric*>(FindPtrOrNull(metric_map_, &prototype).get());
256+
}
257+
251258
void WriteRegistryAsJson(JsonWriter* writer);
252259

253260
} // namespace yb

0 commit comments

Comments
 (0)