Skip to content

Commit 7b5d70b

Browse files
busbeymeiyi
authored andcommitted
HBASE-24138 log more details about balancer decisions for StochasticLoadBalancer (apache#1455)
- at DEBUG log messages about RegionCountSkewCostFunction region/server totals - at DEBUG log messages about the decision to balance or not with total costs - at TRACE log messages about region count on each server RegionCountSkewCostFunction sees - at TRACE log message with the individual cost functions used in the decision to balance or not Signed-off-by: Viraj Jasani <vjasani@apache.org> (cherry picked from commit 2d78a28)
1 parent 407c74a commit 7b5d70b

File tree

3 files changed

+37
-17
lines changed

3 files changed

+37
-17
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,9 @@ public boolean equals(Object o) {
6565
}
6666
return false;
6767
}
68+
69+
@Override
70+
public String toString() {
71+
return "server=" + sn + " , load=" + load;
72+
}
6873
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -330,28 +330,29 @@ protected boolean needsBalance(TableName tableName, Cluster cluster) {
330330
for (CostFunction c : costFunctions) {
331331
float multiplier = c.getMultiplier();
332332
if (multiplier <= 0) {
333+
LOG.trace("{} not needed because multiplier is <= 0", c.getClass().getSimpleName());
333334
continue;
334335
}
335336
if (!c.isNeeded()) {
336-
LOG.debug("{} not needed", c.getClass().getSimpleName());
337+
LOG.trace("{} not needed", c.getClass().getSimpleName());
337338
continue;
338339
}
339340
sumMultiplier += multiplier;
340341
total += c.cost() * multiplier;
341342
}
342343

343-
if (total <= 0 || sumMultiplier <= 0
344-
|| (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance)) {
344+
boolean balanced = total <= 0 || sumMultiplier <= 0 ||
345+
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
346+
if (LOG.isDebugEnabled()) {
347+
LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}",
348+
balanced ? "Skipping load balancing because balanced" : "We need to load balance",
349+
isByTable ? String.format("table (%s)", tableName) : "cluster",
350+
total, sumMultiplier, minCostNeedBalance);
345351
if (LOG.isTraceEnabled()) {
346-
final String loadBalanceTarget =
347-
isByTable ? String.format("table (%s)", tableName) : "cluster";
348-
LOG.trace("Skipping load balancing because the {} is balanced. Total cost: {}, "
349-
+ "Sum multiplier: {}, Minimum cost needed for balance: {}", loadBalanceTarget, total,
350-
sumMultiplier, minCostNeedBalance);
352+
LOG.trace("Balance decision detailed function costs={}", functionCost());
351353
}
352-
return false;
353354
}
354-
return true;
355+
return !balanced;
355356
}
356357

357358
@VisibleForTesting
@@ -1188,16 +1189,27 @@ static class RegionCountSkewCostFunction extends CostFunction {
11881189
this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
11891190
}
11901191

1192+
@Override
1193+
void init(Cluster cluster) {
1194+
super.init(cluster);
1195+
LOG.debug("{} sees a total of {} servers and {} regions.", getClass().getSimpleName(),
1196+
cluster.numServers, cluster.numRegions);
1197+
if (LOG.isTraceEnabled()) {
1198+
for (int i =0; i < cluster.numServers; i++) {
1199+
LOG.trace("{} sees server '{}' has {} regions", getClass().getSimpleName(),
1200+
cluster.servers[i], cluster.regionsPerServer[i].length);
1201+
}
1202+
}
1203+
}
1204+
11911205
@Override
11921206
protected double cost() {
11931207
if (stats == null || stats.length != cluster.numServers) {
11941208
stats = new double[cluster.numServers];
11951209
}
1196-
11971210
for (int i =0; i < cluster.numServers; i++) {
11981211
stats[i] = cluster.regionsPerServer[i].length;
11991212
}
1200-
12011213
return costFromArray(stats);
12021214
}
12031215
}

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,11 @@ public void assertClusterAsBalanced(List<ServerAndLoad> servers) {
201201
int max = numRegions % numServers == 0 ? min : min + 1;
202202

203203
for (ServerAndLoad server : servers) {
204-
assertTrue(server.getLoad() >= 0);
205-
assertTrue(server.getLoad() <= max);
206-
assertTrue(server.getLoad() >= min);
204+
assertTrue("All servers should have a positive load. " + server, server.getLoad() >= 0);
205+
assertTrue("All servers should have load no more than " + max + ". " + server,
206+
server.getLoad() <= max);
207+
assertTrue("All servers should have load no less than " + min + ". " + server,
208+
server.getLoad() >= min);
207209
}
208210
}
209211

@@ -556,7 +558,7 @@ protected void testWithCluster(Map<ServerName, List<RegionInfo>> serverMap,
556558
Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable =
557559
(Map) mockClusterServersWithTables(serverMap);
558560
List<RegionPlan> plans = loadBalancer.balanceCluster(LoadOfAllTable);
559-
assertNotNull(plans);
561+
assertNotNull("Initial cluster balance should produce plans.", plans);
560562

561563
// Check to see that this actually got to a stable place.
562564
if (assertFullyBalanced || assertFullyBalancedForReplicas) {
@@ -570,7 +572,8 @@ protected void testWithCluster(Map<ServerName, List<RegionInfo>> serverMap,
570572
assertClusterAsBalanced(balancedCluster);
571573
LoadOfAllTable = (Map) mockClusterServersWithTables(serverMap);
572574
List<RegionPlan> secondPlans = loadBalancer.balanceCluster(LoadOfAllTable);
573-
assertNull(secondPlans);
575+
assertNull("Given a requirement to be fully balanced, second attempt at plans should " +
576+
"produce none.", secondPlans);
574577
}
575578

576579
if (assertFullyBalancedForReplicas) {

0 commit comments

Comments
 (0)