Skip to content

Commit 4b94f2e

Browse files
committed
HBASE-24138 log more details about balancer decisions for StochasticLoadBalancer (#1455)
- at DEBUG log messages about RegionCountSkewCostFunction region/server totals - at DEBUG log messages about the decision to balance or not with total costs - at TRACE log messages about region count on each server RegionCountSkewCostFunction sees - at TRACE log message with the individual cost functions used in the decision to balance or not Signed-off-by: Viraj Jasani <vjasani@apache.org> (cherry picked from commit 2d78a28)
1 parent 27f512d commit 4b94f2e

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerAndLoad.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,9 @@ public boolean equals(Object o) {
6565
}
6666
return false;
6767
}
68+
69+
@Override
70+
public String toString() {
71+
return "server=" + sn + " , load=" + load;
72+
}
6873
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -334,26 +334,34 @@ protected boolean needsBalance(Cluster cluster) {
334334
for (CostFunction c : costFunctions) {
335335
float multiplier = c.getMultiplier();
336336
if (multiplier <= 0) {
337+
if (LOG.isTraceEnabled()) {
338+
LOG.trace(c.getClass().getSimpleName() + " not needed because multiplier is <= 0");
339+
}
337340
continue;
338341
}
339342
if (!c.isNeeded()) {
340-
LOG.debug(c.getClass().getName() + " indicated that its cost should not be considered");
343+
if (LOG.isTraceEnabled()) {
344+
LOG.trace(c.getClass().getSimpleName() + " not needed");
345+
}
341346
continue;
342347
}
343348
sumMultiplier += multiplier;
344349
total += c.cost() * multiplier;
345350
}
346351

347-
if (total <= 0 || sumMultiplier <= 0
348-
|| (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance)) {
349-
final String loadBalanceTarget =
350-
isByTable ? String.format("table (%s)", tableName) : "cluster";
351-
LOG.info(String.format("Skipping load balancing because the %s is balanced. Total cost: %s, "
352-
+ "Sum multiplier: %s, Minimum cost needed for balance: %s", loadBalanceTarget, total,
353-
sumMultiplier, minCostNeedBalance));
354-
return false;
352+
boolean balanced = total <= 0 || sumMultiplier <= 0 ||
353+
(sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
354+
if (LOG.isDebugEnabled()) {
355+
LOG.debug(
356+
(balanced ? "Skipping load balancing because balanced" : "We need to load balance") +
357+
" " + (isByTable ? String.format("table (%s)", tableName) : "cluster") +
358+
"; total cost=" + total + ", sum multiplier=" + sumMultiplier + "; cost/multiplier to " +
359+
"need a balance is " + minCostNeedBalance);
360+
if (LOG.isTraceEnabled()) {
361+
LOG.trace("Balance decision detailed function costs=" + functionCost());
362+
}
355363
}
356-
return true;
364+
return !balanced;
357365
}
358366

359367
@Override
@@ -1207,16 +1215,27 @@ static class RegionCountSkewCostFunction extends CostFunction {
12071215
this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
12081216
}
12091217

1218+
@Override
1219+
void init(Cluster cluster) {
1220+
super.init(cluster);
1221+
LOG.debug(getClass().getSimpleName() + " sees a total of " + cluster.numServers +
1222+
" servers and " + cluster.numRegions + " regions.");
1223+
if (LOG.isTraceEnabled()) {
1224+
for (int i =0; i < cluster.numServers; i++) {
1225+
LOG.trace(getClass().getSimpleName() + " sees server '" + cluster.servers[i] +
1226+
"' has " + cluster.regionsPerServer[i].length + " regions");
1227+
}
1228+
}
1229+
}
1230+
12101231
@Override
12111232
protected double cost() {
12121233
if (stats == null || stats.length != cluster.numServers) {
12131234
stats = new double[cluster.numServers];
12141235
}
1215-
12161236
for (int i =0; i < cluster.numServers; i++) {
12171237
stats[i] = cluster.regionsPerServer[i].length;
12181238
}
1219-
12201239
return costFromArray(stats);
12211240
}
12221241
}

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,11 @@ public void assertClusterAsBalanced(List<ServerAndLoad> servers) {
202202
int max = numRegions % numServers == 0 ? min : min + 1;
203203

204204
for (ServerAndLoad server : servers) {
205-
assertTrue(server.getLoad() >= 0);
206-
assertTrue(server.getLoad() <= max);
207-
assertTrue(server.getLoad() >= min);
205+
assertTrue("All servers should have a positive load. " + server, server.getLoad() >= 0);
206+
assertTrue("All servers should have load no more than " + max + ". " + server,
207+
server.getLoad() <= max);
208+
assertTrue("All servers should have load no less than " + min + ". " + server,
209+
server.getLoad() >= min);
208210
}
209211
}
210212

@@ -434,7 +436,7 @@ protected void testWithCluster(Map<ServerName, List<HRegionInfo>> serverMap,
434436
loadBalancer.setRackManager(rackManager);
435437
// Run the balancer.
436438
List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
437-
assertNotNull(plans);
439+
assertNotNull("Initial cluster balance should produce plans.", plans);
438440

439441
// Check to see that this actually got to a stable place.
440442
if (assertFullyBalanced || assertFullyBalancedForReplicas) {
@@ -447,7 +449,8 @@ protected void testWithCluster(Map<ServerName, List<HRegionInfo>> serverMap,
447449
if (assertFullyBalanced) {
448450
assertClusterAsBalanced(balancedCluster);
449451
List<RegionPlan> secondPlans = loadBalancer.balanceCluster(serverMap);
450-
assertNull(secondPlans);
452+
assertNull("Given a requirement to be fully balanced, second attempt at plans should " +
453+
"produce none.", secondPlans);
451454
}
452455

453456
if (assertFullyBalancedForReplicas) {

0 commit comments

Comments
 (0)