|
29 | 29 |
|
30 | 30 | #include "yb/tools/yb-admin_client.h"
|
31 | 31 |
|
| 32 | +#include "yb/tablet/tablet_peer.h" |
| 33 | + |
32 | 34 | #include "yb/tserver/mini_tablet_server.h"
|
33 | 35 | #include "yb/tserver/tablet_server.h"
|
34 | 36 | #include "yb/tserver/tablet_server_options.h"
|
35 | 37 |
|
36 | 38 | #include "yb/util/backoff_waiter.h"
|
| 39 | +#include "yb/util/metrics.h" |
37 | 40 | #include "yb/util/monotime.h"
|
38 | 41 | #include "yb/util/multi_drive_test_env.h"
|
39 | 42 |
|
| 43 | +METRIC_DECLARE_gauge_uint32(blacklisted_leaders); |
40 | 44 | METRIC_DECLARE_event_stats(load_balancer_duration);
|
| 45 | +METRIC_DECLARE_gauge_uint32(tablets_in_wrong_placement); |
| 46 | +METRIC_DECLARE_gauge_uint32(total_table_load_difference); |
41 | 47 |
|
| 48 | +DECLARE_int32(catalog_manager_bg_task_wait_ms); |
42 | 49 | DECLARE_bool(enable_load_balancing);
|
43 | 50 | DECLARE_bool(load_balancer_drive_aware);
|
44 |
| -DECLARE_int32(catalog_manager_bg_task_wait_ms); |
| 51 | +DECLARE_int32(load_balancer_max_concurrent_moves); |
| 52 | +DECLARE_int32(replication_factor); |
45 | 53 | DECLARE_int32(TEST_slowdown_master_async_rpc_tasks_by_ms);
|
46 | 54 | DECLARE_int32(TEST_load_balancer_wait_ms);
|
47 | 55 | DECLARE_int32(TEST_load_balancer_wait_after_count_pending_tasks_ms);
|
48 | 56 | DECLARE_bool(tserver_heartbeat_metrics_add_drive_data);
|
49 |
| -DECLARE_int32(load_balancer_max_concurrent_moves); |
50 | 57 | DECLARE_int32(tserver_heartbeat_metrics_interval_ms);
|
51 | 58 |
|
52 | 59 | using namespace std::literals;
|
@@ -238,6 +245,20 @@ class LoadBalancerMiniClusterTest : public LoadBalancerMiniClusterTestBase {
|
238 | 245 | int num_tablets() override {
|
239 | 246 | return 4;
|
240 | 247 | }
|
| 248 | + |
| 249 | + Status AddTserverToBlacklist(size_t idx, bool leader_blacklist) { |
| 250 | + HostPort ts_hostport(mini_cluster()->mini_tablet_server(idx)->bound_rpc_addr()); |
| 251 | + RETURN_NOT_OK( |
| 252 | + yb_admin_client_->ChangeBlacklist({ts_hostport}, true /* add */, leader_blacklist)); |
| 253 | + return Status::OK(); |
| 254 | + } |
| 255 | + |
| 256 | + Status RemoveTserverFromBlacklist(size_t idx, bool leader_blacklist) { |
| 257 | + HostPort ts_hostport(mini_cluster()->mini_tablet_server(idx)->bound_rpc_addr()); |
| 258 | + RETURN_NOT_OK( |
| 259 | + yb_admin_client_->ChangeBlacklist({ts_hostport}, false /* add */, leader_blacklist)); |
| 260 | + return Status::OK(); |
| 261 | + } |
241 | 262 | };
|
242 | 263 |
|
243 | 264 | class LoadBalancerMiniClusterRf3Test : public LoadBalancerMiniClusterTest {
|
@@ -271,6 +292,122 @@ TEST_F(LoadBalancerMiniClusterRf3Test, DurationMetric) {
|
271 | 292 | }, 10s, "load_balancer_duration value resets"));
|
272 | 293 | }
|
273 | 294 |
|
| 295 | +TEST_F(LoadBalancerMiniClusterTest, TabletsInWrongPlacementMetric) { |
| 296 | + const int ts_idx = 0; |
| 297 | + auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster()); |
| 298 | + auto cluster_metric_entity = mini_master->master()->metric_entity_cluster(); |
| 299 | + auto tablets_in_wrong_placement = |
| 300 | + cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>(METRIC_tablets_in_wrong_placement); |
| 301 | + ASSERT_EQ(tablets_in_wrong_placement->value(), 0); |
| 302 | + |
| 303 | + // Prevent moves so we can reliably read the metric and get a non-zero value. |
| 304 | + FLAGS_load_balancer_max_concurrent_adds = 0; |
| 305 | + |
| 306 | + unsigned int peers_on_ts = 0; |
| 307 | + auto ts_uuid = mini_cluster_->mini_tablet_server(ts_idx)->server()->permanent_uuid(); |
| 308 | + for (const auto& peer : ListTabletPeers(mini_cluster(), ListPeersFilter::kAll)) { |
| 309 | + if (peer->permanent_uuid() == ts_uuid) { |
| 310 | + ++peers_on_ts; |
| 311 | + } |
| 312 | + } |
| 313 | + ASSERT_GT(peers_on_ts, 0); |
| 314 | + |
| 315 | + ASSERT_EQ(tablets_in_wrong_placement->value(), 0); |
| 316 | + |
| 317 | + // Blacklist first tserver. |
| 318 | + ASSERT_OK(AddTserverToBlacklist(ts_idx, false /* leader_blacklist */)); |
| 319 | + SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms); |
| 320 | + ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts); |
| 321 | + |
| 322 | + // Change placement info to make first tserver invalid. |
| 323 | + // The invalid and blacklisted tablets should not be double-counted. |
| 324 | + ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("cloud1.rack2.zone,cloud2.rack3.zone", 3, "")); |
| 325 | + SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms); |
| 326 | + ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts); |
| 327 | + |
| 328 | + // Unblacklist first tserver. |
| 329 | + // The tablets are still invalid, so the metric should not change. |
| 330 | + ASSERT_OK(RemoveTserverFromBlacklist(ts_idx, false /* leader_blacklist */)); |
| 331 | + SleepFor(FLAGS_catalog_manager_bg_task_wait_ms * 2ms); |
| 332 | + ASSERT_EQ(tablets_in_wrong_placement->value(), peers_on_ts); |
| 333 | + |
| 334 | + // Change placement info to make first tserver valid again. |
| 335 | + ASSERT_OK(yb_admin_client_->ModifyPlacementInfo( |
| 336 | + "cloud1.rack1.zone,cloud1.rack2.zone,cloud2.rack3.zone", 3, "")); |
| 337 | + ASSERT_OK(WaitFor([&] { |
| 338 | + return tablets_in_wrong_placement->value() == 0; |
| 339 | + }, 5s, "Wait for tablets_in_wrong_placement to be 0")); |
| 340 | +} |
| 341 | + |
| 342 | +TEST_F(LoadBalancerMiniClusterTest, BlacklistedLeadersMetric) { |
| 343 | + auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster()); |
| 344 | + auto cluster_metric_entity = mini_master->master()->metric_entity_cluster(); |
| 345 | + auto blacklisted_leaders = |
| 346 | + cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>(METRIC_blacklisted_leaders); |
| 347 | + ASSERT_EQ(blacklisted_leaders->value(), 0); |
| 348 | + |
| 349 | + // Prevent leader moves so we can reliably read the metric and get a non-zero value. |
| 350 | + FLAGS_load_balancer_max_concurrent_moves = 0; |
| 351 | + |
| 352 | + // Leader blacklist first tserver. |
| 353 | + ASSERT_OK(AddTserverToBlacklist(0 /* idx */, true /* leader_blacklist */)); |
| 354 | + ASSERT_OK(WaitFor([&] { |
| 355 | + return blacklisted_leaders->value() > 0; |
| 356 | + }, 5s, "Wait for blacklisted_leaders to reflect blacklisted leader")); |
| 357 | +} |
| 358 | + |
| 359 | +TEST_F(LoadBalancerMiniClusterTest, TableLoadDifferenceMetric) { |
| 360 | + const auto kNumTablets = static_cast<uint32_t>(num_tablets()); |
| 361 | + auto* mini_master = ASSERT_RESULT(mini_cluster()->GetLeaderMiniMaster()); |
| 362 | + auto cluster_metric_entity = mini_master->master()->metric_entity_cluster(); |
| 363 | + auto load_difference_metric = cluster_metric_entity->FindOrNull<AtomicGauge<uint32_t>>( |
| 364 | + METRIC_total_table_load_difference); |
| 365 | + |
| 366 | + ASSERT_EQ(load_difference_metric->value(), 0); |
| 367 | + |
| 368 | + // Prevent moves temporarily so we can reliably read the metric and get a non-zero value. |
| 369 | + ANNOTATE_UNPROTECTED_WRITE(FLAGS_load_balancer_max_concurrent_adds) = 0; |
| 370 | + |
| 371 | + // Add a new node and wait for load difference to equal kNumTablets (each existing node has |
| 372 | + // kNumTablets tablets, and the new node has 0). |
| 373 | + auto new_ts_index = mini_cluster()->num_tablet_servers(); |
| 374 | + ASSERT_OK(mini_cluster()->AddTabletServer()); |
| 375 | + ASSERT_OK(mini_cluster()->WaitForTabletServerCount(new_ts_index + 1)); |
| 376 | + ASSERT_OK(WaitFor([&] { |
| 377 | + return load_difference_metric->value() == kNumTablets; |
| 378 | + }, 5s, "load_difference reflects new node")); |
| 379 | + |
| 380 | + // Enable moves and verify that load_difference monotonically decreases to 0. |
| 381 | + auto load_difference_low_water_mark = load_difference_metric->value(); |
| 382 | + ANNOTATE_UNPROTECTED_WRITE(FLAGS_load_balancer_max_concurrent_adds) = 1; |
| 383 | + ASSERT_OK(WaitFor([&]() -> Result<bool> { |
| 384 | + auto load_difference = load_difference_metric->value(); |
| 385 | + if (load_difference > load_difference_low_water_mark) { |
| 386 | + return STATUS_FORMAT( |
| 387 | + IllegalState, "load_difference unexpectedly increased from $0 to $1", |
| 388 | + load_difference_low_water_mark, load_difference); |
| 389 | + } |
| 390 | + load_difference_low_water_mark = load_difference; |
| 391 | + return load_difference == 0 && VERIFY_RESULT(client_->IsLoadBalancerIdle()); |
| 392 | + }, 30s, "Wait for tablet moves after adding ts-3")); |
| 393 | + |
| 394 | + // Blacklist first tserver. |
| 395 | + // The replicas should move off the blacklisted tserver and load_difference should increase to |
| 396 | + // kNumTablets again. |
| 397 | + auto load_difference_high_water_mark = load_difference_metric->value(); |
| 398 | + ASSERT_OK(AddTserverToBlacklist(0 /* idx */, false /* leader_blacklist */)); |
| 399 | + ASSERT_OK(WaitFor([&]() -> Result<bool> { |
| 400 | + auto load_difference = load_difference_metric->value(); |
| 401 | + if (load_difference < load_difference_high_water_mark) { |
| 402 | + return STATUS_FORMAT( |
| 403 | + IllegalState, "load_difference unexpectedly decreased from $0 to $1", |
| 404 | + load_difference_high_water_mark, load_difference); |
| 405 | + } |
| 406 | + load_difference_high_water_mark = load_difference; |
| 407 | + return load_difference == kNumTablets && VERIFY_RESULT(client_->IsLoadBalancerIdle()); |
| 408 | + }, 30s, "Wait for tablet moves after blacklisting ts-0")); |
| 409 | +} |
| 410 | + |
274 | 411 | // See issue #6278. This test tests the segfault that used to occur during a rare race condition,
|
275 | 412 | // where we would have an uninitialized TSDescriptor that we try to access.
|
276 | 413 | // To trigger the race condition, we need a pending add task that gets completed after
|
|
0 commit comments