Skip to content

Commit 757173f

Browse files
author
Ray Mattingly
committed
fix cost updating bug
1 parent 421f571 commit 757173f

File tree

3 files changed

+79
-57
lines changed

3 files changed

+79
-57
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ boolean shouldSkipSloppyServerEvaluation() {
105105
return isConditionalBalancingEnabled();
106106
}
107107

108+
boolean isConditionalBalancingEnabled() {
109+
return !conditionalClasses.isEmpty();
110+
}
111+
108112
void clearConditionalWeightCaches() {
109113
conditionals.stream().map(RegionPlanConditional::getCandidateGenerator)
110114
.flatMap(Optional::stream).forEach(RegionPlanConditionalCandidateGenerator::clearWeightCache);
@@ -180,10 +184,6 @@ private RegionPlanConditional createConditional(Class<? extends RegionPlanCondit
180184
return null;
181185
}
182186

183-
private boolean isConditionalBalancingEnabled() {
184-
return !conditionalClasses.isEmpty();
185-
}
186-
187187
@Override
188188
public void setConf(Configuration conf) {
189189
this.conf = conf;

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
@InterfaceAudience.Private
2929
abstract class CandidateGenerator {
3030

31-
static double MAX_WEIGHT = 100_000;
31+
static double MAX_WEIGHT = 1.0;
3232

3333
abstract BalanceAction generate(BalancerClusterState cluster);
3434

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 74 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,20 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.balancer;
1919

20+
import com.google.common.base.Suppliers;
2021
import com.google.errorprone.annotations.RestrictedApi;
2122
import java.lang.reflect.Constructor;
2223
import java.util.ArrayDeque;
2324
import java.util.ArrayList;
2425
import java.util.Arrays;
26+
import java.util.Collections;
2527
import java.util.Deque;
2628
import java.util.HashMap;
2729
import java.util.List;
2830
import java.util.Map;
2931
import java.util.Optional;
3032
import java.util.concurrent.ThreadLocalRandom;
33+
import java.util.concurrent.TimeUnit;
3134
import java.util.function.Supplier;
3235
import org.apache.hadoop.conf.Configuration;
3336
import org.apache.hadoop.hbase.ClusterMetrics;
@@ -159,6 +162,13 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
159162

160163
protected Map<Class<? extends CandidateGenerator>, CandidateGenerator> candidateGenerators;
161164
private Map<Class<? extends CandidateGenerator>, Double> weightsOfGenerators;
165+
private final Supplier<List<Class<? extends CandidateGenerator>>> shuffledGeneratorClasses =
166+
Suppliers.memoizeWithExpiration(() -> {
167+
List<Class<? extends CandidateGenerator>> shuffled =
168+
new ArrayList<>(candidateGenerators.keySet());
169+
Collections.shuffle(shuffled);
170+
return shuffled;
171+
}, 5, TimeUnit.SECONDS);
162172

163173
private final BalancerConditionals balancerConditionals = BalancerConditionals.INSTANCE;
164174

@@ -435,47 +445,50 @@ Pair<CandidateGenerator, BalanceAction> nextAction(BalancerClusterState cluster)
435445
* all cost functions that benefit from it.
436446
*/
437447
protected CandidateGenerator getRandomGenerator(BalancerClusterState cluster) {
438-
double sum = 0;
439-
for (Class<? extends CandidateGenerator> clazz : candidateGenerators.keySet()) {
440-
sum += weightsOfGenerators.getOrDefault(clazz, 0.0);
441-
}
442-
if (sum == 0) {
443-
return candidateGenerators.values().stream().findAny().orElseThrow();
448+
// Prefer conditional generators if they have moves to make
449+
if (balancerConditionals.isConditionalBalancingEnabled()) {
450+
for (RegionPlanConditional conditional : balancerConditionals.getConditionals()) {
451+
Optional<RegionPlanConditionalCandidateGenerator> generator =
452+
conditional.getCandidateGenerator();
453+
if (generator.isPresent() && generator.get().getWeight(cluster) > 0) {
454+
return generator.get();
455+
}
456+
}
444457
}
445458

446-
for (Class<? extends CandidateGenerator> clazz : candidateGenerators.keySet()) {
447-
weightsOfGenerators.put(clazz,
448-
Math.min(CandidateGenerator.MAX_WEIGHT, weightsOfGenerators.get(clazz) / sum));
459+
List<Class<? extends CandidateGenerator>> generatorClasses = shuffledGeneratorClasses.get();
460+
List<Double> partialSums = new ArrayList<>(generatorClasses.size());
461+
double sum = 0.0;
462+
for (Class<? extends CandidateGenerator> clazz : generatorClasses) {
463+
double weight = weightsOfGenerators.getOrDefault(clazz, 0.0);
464+
sum += weight;
465+
partialSums.add(sum);
449466
}
450467

451-
for (RegionPlanConditional conditional : balancerConditionals.getConditionals()) {
452-
Optional<RegionPlanConditionalCandidateGenerator> generator =
453-
conditional.getCandidateGenerator();
454-
if (generator.isPresent() && generator.get().getWeight(cluster) > 0) {
455-
return generator.get();
468+
// If the sum of all weights is zero, fall back to a default (e.g., first in the list)
469+
if (sum == 0.0) {
470+
// If no generators at all, fail fast or throw
471+
if (generatorClasses.isEmpty()) {
472+
throw new IllegalStateException("No candidate generators available");
456473
}
474+
return candidateGenerators.get(generatorClasses.get(0));
475+
}
476+
477+
// Normalize partial sums so that the last one should be exactly 1.0
478+
for (int i = 0; i < partialSums.size(); i++) {
479+
partialSums.set(i, partialSums.get(i) / sum);
457480
}
458481

482+
// Generate a random number and pick the first generator whose partial sum is >= rand
459483
double rand = ThreadLocalRandom.current().nextDouble();
460-
CandidateGenerator greatestWeightGenerator = null;
461-
double greatestWeight = 0;
462-
for (Map.Entry<Class<? extends CandidateGenerator>,
463-
CandidateGenerator> entry : candidateGenerators.entrySet()) {
464-
Class<? extends CandidateGenerator> clazz = entry.getKey();
465-
double generatorWeight = weightsOfGenerators.get(clazz);
466-
if (generatorWeight > greatestWeight) {
467-
greatestWeight = generatorWeight;
468-
greatestWeightGenerator = entry.getValue();
469-
}
470-
if (rand <= generatorWeight) {
471-
return entry.getValue();
484+
for (int i = 0; i < partialSums.size(); i++) {
485+
if (rand <= partialSums.get(i)) {
486+
return candidateGenerators.get(generatorClasses.get(i));
472487
}
473488
}
474489

475-
if (greatestWeightGenerator != null) {
476-
return greatestWeightGenerator;
477-
}
478-
return candidateGenerators.values().stream().findAny().orElseThrow();
490+
// Fallback: if for some reason we didn't return above, return the last generator
491+
return candidateGenerators.get(generatorClasses.get(generatorClasses.size() - 1));
479492
}
480493

481494
@RestrictedApi(explanation = "Should only be called in tests", link = "",
@@ -565,7 +578,7 @@ protected List<RegionPlan> balanceTable(TableName tableName,
565578
// Perform a stochastic walk to see if we can get a good fit.
566579
long step;
567580

568-
boolean improvedConditionals = false;
581+
boolean planImprovedConditionals = false;
569582
Map<Class<? extends CandidateGenerator>, Long> generatorToStepCount = new HashMap<>();
570583
Map<Class<? extends CandidateGenerator>, Long> generatorToApprovedActionCount = new HashMap<>();
571584
for (step = 0; step < computedMaxSteps; step++) {
@@ -576,41 +589,50 @@ protected List<RegionPlan> balanceTable(TableName tableName,
576589
if (action.getType() == BalanceAction.Type.NULL) {
577590
continue;
578591
}
592+
579593
generatorToStepCount.merge(generator.getClass(), action.getStepCount(), Long::sum);
580-
step += action.getStepCount() - 1;
581-
582-
// Always accept a conditional generator output. Sometimes conditional generators
583-
// may need to make controversial moves in order to break what would otherwise
584-
// be a deadlocked situation.
585-
// Otherwise, for normal moves, evaluate the action.
586-
int conditionalViolationsChange;
587-
boolean isViolating = false;
588-
if (RegionPlanConditionalCandidateGenerator.class.isAssignableFrom(generator.getClass())) {
589-
conditionalViolationsChange = -1;
590-
} else {
591-
conditionalViolationsChange = balancerConditionals.getViolationCountChange(cluster, action);
592-
isViolating = balancerConditionals.isViolating(cluster, action);
594+
long additionalSteps = action.getStepCount() - 1;
595+
if (additionalSteps > 0) {
596+
step += additionalSteps;
597+
}
598+
599+
int conditionalViolationsChange = 0;
600+
boolean isViolatingConditionals = false;
601+
boolean moveImprovedConditionals = false;
602+
// Only check conditionals if they are enabled
603+
if (balancerConditionals.isConditionalBalancingEnabled()) {
604+
// Always accept a conditional generator output. Sometimes conditional generators
605+
// may need to make controversial moves in order to break what would otherwise
606+
// be a deadlocked situation.
607+
// Otherwise, for normal moves, evaluate the action.
608+
if (RegionPlanConditionalCandidateGenerator.class.isAssignableFrom(generator.getClass())) {
609+
conditionalViolationsChange = -1;
610+
} else {
611+
conditionalViolationsChange =
612+
balancerConditionals.getViolationCountChange(cluster, action);
613+
isViolatingConditionals = balancerConditionals.isViolating(cluster, action);
614+
}
615+
moveImprovedConditionals = conditionalViolationsChange < 0;
616+
if (moveImprovedConditionals) {
617+
planImprovedConditionals = true;
618+
}
593619
}
594620

595621
// Change state and evaluate costs
596622
cluster.doAction(action);
623+
updateCostsAndWeightsWithAction(cluster, action);
597624
newCost = computeCost(cluster, currentCost);
598625

599-
boolean conditionalsImproved = conditionalViolationsChange < 0;
600-
if (conditionalsImproved) {
601-
improvedConditionals = true;
602-
}
603626
boolean conditionalsSimilarCostsImproved =
604-
(newCost < currentCost && conditionalViolationsChange == 0 && !isViolating);
627+
(newCost < currentCost && conditionalViolationsChange == 0 && !isViolatingConditionals);
605628
// Our first priority is to reduce conditional violations
606629
// Our second priority is to reduce balancer cost
607630
// change, regardless of cost change
608-
if (conditionalsImproved || conditionalsSimilarCostsImproved) {
631+
if (moveImprovedConditionals || conditionalsSimilarCostsImproved) {
609632
currentCost = newCost;
610633
generatorToApprovedActionCount.merge(generator.getClass(), action.getStepCount(),
611634
Long::sum);
612635
balancerConditionals.loadClusterState(cluster);
613-
updateCostsAndWeightsWithAction(cluster, action);
614636

615637
// save for JMX
616638
curOverallCost = currentCost;
@@ -641,7 +663,7 @@ protected List<RegionPlan> balanceTable(TableName tableName,
641663

642664
metricsBalancer.balanceCluster(endTime - startTime);
643665

644-
if (improvedConditionals || initCost > currentCost) {
666+
if (planImprovedConditionals || initCost > currentCost) {
645667
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
646668
List<RegionPlan> plans = createRegionPlans(cluster);
647669
LOG.info(

0 commit comments

Comments
 (0)