Skip to content

Commit e1283f4

Browse files
author
Ray Mattingly
committed
Way more tests
1 parent aafaa1c commit e1283f4

15 files changed

+730
-301
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Collections;
2222
import java.util.List;
2323
import java.util.Set;
24+
import java.util.concurrent.atomic.AtomicInteger;
2425
import java.util.stream.Collectors;
2526
import org.apache.hadoop.conf.Configuration;
2627
import org.apache.hadoop.hbase.master.RegionPlan;
@@ -62,16 +63,46 @@ public class BalancerConditionals {
6263
public static final String ADDITIONAL_CONDITIONALS_KEY =
6364
"hbase.master.balancer.stochastic.additionalConditionals";
6465

66+
// when this count is low, we'll be more likely to trigger a subsequent balancer run
67+
private static final AtomicInteger BALANCE_COUNT_WITHOUT_IMPROVEMENTS = new AtomicInteger(0);
68+
private static final int BALANCE_COUNT_WITHOUT_IMPROVEMENTS_CEILING = 10;
69+
6570
private Set<Class<? extends RegionPlanConditional>> conditionalClasses = Collections.emptySet();
6671
private Set<RegionPlanConditional> conditionals = Collections.emptySet();
67-
68-
private int lastViolationCount = 0;
72+
private Configuration conf;
6973

7074
private BalancerConditionals() {
7175
}
7276

73-
protected int getLastViolationCount() {
74-
return lastViolationCount;
77+
protected boolean isTableIsolationEnabled() {
78+
return conditionalClasses.contains(SystemTableIsolationConditional.class)
79+
|| conditionalClasses.contains(MetaTableIsolationConditional.class);
80+
}
81+
82+
protected boolean shouldRunBalancer() {
83+
return BALANCE_COUNT_WITHOUT_IMPROVEMENTS.get() < BALANCE_COUNT_WITHOUT_IMPROVEMENTS_CEILING;
84+
}
85+
86+
protected int getConsecutiveBalancesWithoutImprovement() {
87+
return BALANCE_COUNT_WITHOUT_IMPROVEMENTS.get();
88+
}
89+
90+
protected void incConsecutiveBalancesWithoutImprovement() {
91+
if (BALANCE_COUNT_WITHOUT_IMPROVEMENTS.get() == Integer.MAX_VALUE) {
92+
return;
93+
}
94+
this.BALANCE_COUNT_WITHOUT_IMPROVEMENTS.getAndIncrement();
95+
LOG.trace("Set balanceCountWithoutImprovements={}",
96+
this.BALANCE_COUNT_WITHOUT_IMPROVEMENTS.get());
97+
}
98+
99+
protected void resetConsecutiveBalancesWithoutImprovement() {
100+
this.BALANCE_COUNT_WITHOUT_IMPROVEMENTS.set(0);
101+
LOG.trace("Set balanceCountWithoutImprovements=0");
102+
}
103+
104+
protected Set<Class<? extends RegionPlanConditional>> getConditionalClasses() {
105+
return Set.copyOf(conditionalClasses);
75106
}
76107

77108
protected boolean shouldSkipSloppyServerEvaluation() {
@@ -81,6 +112,7 @@ protected boolean shouldSkipSloppyServerEvaluation() {
81112
}
82113

83114
protected void loadConf(Configuration conf) {
115+
this.conf = conf;
84116
ImmutableSet.Builder<Class<? extends RegionPlanConditional>> conditionalClasses =
85117
ImmutableSet.builder();
86118

@@ -112,20 +144,21 @@ protected void loadConf(Configuration conf) {
112144
this.conditionalClasses = conditionalClasses.build();
113145
}
114146

115-
protected void loadConditionals(BalancerClusterState cluster) {
116-
conditionals = conditionalClasses.stream().map(clazz -> createConditional(clazz, cluster))
147+
protected void loadClusterState(BalancerClusterState cluster) {
148+
conditionals = conditionalClasses.stream().map(clazz -> createConditional(clazz, conf, cluster))
117149
.collect(Collectors.toSet());
118150
}
119151

120152
protected int getConditionalViolationChange(List<RegionPlan> regionPlans) {
121153
if (conditionals.isEmpty()) {
122-
lastViolationCount = 0;
154+
incConsecutiveBalancesWithoutImprovement();
123155
return 0;
124156
}
125-
int violations = regionPlans.stream()
126-
.mapToInt(regionPlan -> getConditionalViolationChange(conditionals, regionPlan)).sum();
127-
lastViolationCount = violations;
128-
return violations;
157+
int conditionalViolationChange = 0;
158+
for (RegionPlan regionPlan : regionPlans) {
159+
conditionalViolationChange += getConditionalViolationChange(conditionals, regionPlan);
160+
}
161+
return conditionalViolationChange;
129162
}
130163

131164
private static int getConditionalViolationChange(Set<RegionPlanConditional> conditionals,
@@ -135,7 +168,13 @@ private static int getConditionalViolationChange(Set<RegionPlanConditional> cond
135168
int currentConditionalViolationCount =
136169
getConditionalViolationCount(conditionals, inverseRegionPlan);
137170
int newConditionalViolationCount = getConditionalViolationCount(conditionals, regionPlan);
138-
return newConditionalViolationCount - currentConditionalViolationCount;
171+
int violationChange = newConditionalViolationCount - currentConditionalViolationCount;
172+
if (violationChange < 0) {
173+
LOG.trace("Should move region {}_{} from {} to {}", regionPlan.getRegionName(),
174+
regionPlan.getRegionInfo().getReplicaId(), regionPlan.getSource().getServerName(),
175+
regionPlan.getDestination().getServerName());
176+
}
177+
return violationChange;
139178
}
140179

141180
private static int getConditionalViolationCount(Set<RegionPlanConditional> conditionals,
@@ -150,13 +189,14 @@ private static int getConditionalViolationCount(Set<RegionPlanConditional> condi
150189
}
151190

152191
private RegionPlanConditional createConditional(Class<? extends RegionPlanConditional> clazz,
153-
BalancerClusterState cluster) {
192+
Configuration conf, BalancerClusterState cluster) {
154193
try {
155194
Constructor<? extends RegionPlanConditional> ctor =
156-
clazz.getDeclaredConstructor(BalancerClusterState.class);
157-
return ReflectionUtils.instantiate(clazz.getName(), ctor, cluster);
195+
clazz.getDeclaredConstructor(Configuration.class, BalancerClusterState.class);
196+
return ReflectionUtils.instantiate(clazz.getName(), ctor, conf, cluster);
158197
} catch (NoSuchMethodException e) {
159-
LOG.warn("Cannot find constructor with BalancerClusterState parameter for class '{}': {}",
198+
LOG.warn(
199+
"Cannot find constructor with Configuration and BalancerClusterState parameters for class '{}': {}",
160200
clazz.getName(), e.getMessage());
161201
}
162202
return null;

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasConditional.java

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.balancer;
1919

20+
import org.apache.hadoop.conf.Configuration;
21+
import org.apache.hadoop.hbase.client.RegionInfo;
22+
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
2023
import org.apache.hadoop.hbase.master.RegionPlan;
2124
import org.slf4j.Logger;
2225
import org.slf4j.LoggerFactory;
@@ -28,12 +31,19 @@
2831
*/
2932
public class DistributeReplicasConditional extends RegionPlanConditional {
3033

34+
/**
35+
* Local mini cluster tests can only one on one server/rack by design. If enabled, this will
36+
* pretend that localhost RegionServer threads are actually running on separate hosts/racks. This
37+
* should only be used in unit tests.
38+
*/
39+
public static boolean TEST_MODE_ENABLED = false;
40+
3141
private static final Logger LOG = LoggerFactory.getLogger(DistributeReplicasConditional.class);
3242

3343
private final BalancerClusterState cluster;
3444

35-
public DistributeReplicasConditional(BalancerClusterState cluster) {
36-
super(cluster);
45+
public DistributeReplicasConditional(Configuration conf, BalancerClusterState cluster) {
46+
super(conf, cluster);
3747
this.cluster = cluster;
3848
}
3949

@@ -63,15 +73,17 @@ boolean isViolating(RegionPlan regionPlan) {
6373
}
6474

6575
if (
66-
checkViolation(destinationServerIndex, cluster.serversPerHost, cluster.serverIndexToHostIndex,
67-
cluster.regionsPerServer, primaryRegionIndex, cluster.regionIndexToPrimaryIndex, "host")
76+
checkViolation(cluster.regions, regionPlan.getRegionInfo(), destinationServerIndex,
77+
cluster.serversPerHost, cluster.serverIndexToHostIndex, cluster.regionsPerServer,
78+
primaryRegionIndex, "host")
6879
) {
6980
return true;
7081
}
7182

7283
if (
73-
checkViolation(destinationServerIndex, cluster.serversPerRack, cluster.serverIndexToRackIndex,
74-
cluster.regionsPerServer, primaryRegionIndex, cluster.regionIndexToPrimaryIndex, "rack")
84+
checkViolation(cluster.regions, regionPlan.getRegionInfo(), destinationServerIndex,
85+
cluster.serversPerRack, cluster.serverIndexToRackIndex, cluster.regionsPerServer,
86+
primaryRegionIndex, "rack")
7587
) {
7688
return true;
7789
}
@@ -89,23 +101,61 @@ boolean isViolating(RegionPlan regionPlan) {
89101
* @param locationType Type of location being checked ("Host" or "Rack").
90102
* @return True if a violation is found, false otherwise.
91103
*/
92-
static boolean checkViolation(int destinationServerIndex, int[][] serversPerLocation,
93-
int[] serverToLocationIndex, int[][] regionsPerServer, int primaryRegionIndex,
94-
int[] regionIndexToPrimaryIndex, String locationType) {
95-
if (serversPerLocation == null || serversPerLocation.length <= 1) {
96-
LOG.debug("{} violation check skipped: serversPerLocation is null or has <= 1 location",
97-
locationType);
104+
static boolean checkViolation(RegionInfo[] regions, RegionInfo regionToBeMoved,
105+
int destinationServerIndex, int[][] serversPerLocation, int[] serverToLocationIndex,
106+
int[][] regionsPerServer, int primaryRegionIndex, String locationType) {
107+
108+
if (TEST_MODE_ENABLED) {
109+
// Take the flat serversPerLocation, like {0: [0, 1, 2, 3, 4]}
110+
// and pretend it is multi-location, like {0: [1], 1: [2] ...}
111+
int numServers = serversPerLocation[0].length;
112+
// Create a new serversPerLocation array where each server gets its own "location"
113+
int[][] simulatedServersPerLocation = new int[numServers][];
114+
for (int i = 0; i < numServers; i++) {
115+
simulatedServersPerLocation[i] = new int[] { serversPerLocation[0][i] };
116+
}
117+
// Adjust serverToLocationIndex to map each server to its simulated location
118+
int[] simulatedServerToLocationIndex = new int[numServers];
119+
for (int i = 0; i < numServers; i++) {
120+
simulatedServerToLocationIndex[serversPerLocation[0][i]] = i;
121+
}
122+
LOG.trace("Test mode enabled: Simulated {} locations for servers.", numServers);
123+
// Use the simulated arrays for test mode
124+
serversPerLocation = simulatedServersPerLocation;
125+
serverToLocationIndex = simulatedServerToLocationIndex;
126+
}
127+
128+
if (serversPerLocation == null) {
129+
LOG.trace("{} violation check skipped: serversPerLocation is null", locationType);
98130
return false;
99131
}
100132

133+
if (serversPerLocation.length == 1) {
134+
LOG.warn(
135+
"{} violation inevitable: serversPerLocation has only 1 entry. You probably should not be using read replicas.",
136+
locationType);
137+
return true;
138+
}
139+
101140
int destinationLocationIndex = serverToLocationIndex[destinationServerIndex];
102-
LOG.debug("Checking {} violations for destination server index {} at location index {}",
141+
LOG.trace("Checking {} violations for destination server index {} at location index {}",
103142
locationType, destinationServerIndex, destinationLocationIndex);
104143

144+
// For every RegionServer on host/rack
105145
for (int serverIndex : serversPerLocation[destinationLocationIndex]) {
146+
// For every Region on RegionServer
106147
for (int hostedRegion : regionsPerServer[serverIndex]) {
107-
if (regionIndexToPrimaryIndex[hostedRegion] == primaryRegionIndex) {
108-
LOG.debug("{} violation detected: region {} on {} {}", locationType, primaryRegionIndex,
148+
RegionInfo targetRegion = regions[hostedRegion];
149+
if (targetRegion.getEncodedName().equals(regionToBeMoved.getEncodedName())) {
150+
// The balancer state will already show this region as having moved.
151+
// A region's replicas will also have unique encoded names.
152+
// So we should skip this check if the encoded name is the same.
153+
continue;
154+
}
155+
boolean isReplicaForSameRegion =
156+
RegionReplicaUtil.isReplicasForSameRegion(targetRegion, regionToBeMoved);
157+
if (isReplicaForSameRegion) {
158+
LOG.trace("{} violation detected: region {} on {} {}", locationType, primaryRegionIndex,
109159
locationType, destinationLocationIndex);
110160
return true;
111161
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.master.balancer;
19+
20+
import org.apache.hadoop.hbase.client.RegionInfo;
21+
22+
public class MetaTableIsolationCandidateGenerator extends TableIsolationCandidateGenerator {
23+
@Override
24+
boolean shouldBeIsolated(RegionInfo regionInfo) {
25+
return regionInfo.isMetaRegion();
26+
}
27+
}

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/MetaTableIsolationConditional.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.util.HashSet;
2121
import java.util.Set;
22+
import org.apache.hadoop.conf.Configuration;
2223
import org.apache.hadoop.hbase.ServerName;
2324
import org.apache.hadoop.hbase.TableName;
2425
import org.apache.hadoop.hbase.client.RegionInfo;
@@ -30,13 +31,11 @@
3031
*/
3132
class MetaTableIsolationConditional extends RegionPlanConditional {
3233

33-
private final Set<ServerName> emptyServers;
34-
private final Set<ServerName> serversHostingMeta;
34+
private final Set<ServerName> emptyServers = new HashSet<>();
35+
private final Set<ServerName> serversHostingMeta = new HashSet<>();
3536

36-
public MetaTableIsolationConditional(BalancerClusterState cluster) {
37-
super(cluster);
38-
this.emptyServers = new HashSet<>();
39-
this.serversHostingMeta = new HashSet<>();
37+
public MetaTableIsolationConditional(Configuration conf, BalancerClusterState cluster) {
38+
super(conf, cluster);
4039

4140
for (int i = 0; i < cluster.servers.length; i++) {
4241
ServerName server = cluster.servers[i];
@@ -76,7 +75,6 @@ protected static boolean checkViolation(RegionPlan regionPlan, Set<ServerName> s
7675
Set<ServerName> emptyServers) {
7776
boolean isMeta = regionPlan.getRegionInfo().getTable().equals(TableName.META_TABLE_NAME);
7877
ServerName destination = regionPlan.getDestination();
79-
8078
if (isMeta) {
8179
// meta must go to an empty server or a server already hosting meta
8280
return !(serversHostingMeta.contains(destination) || emptyServers.contains(destination));

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionPlanConditional.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.balancer;
1919

20+
import org.apache.hadoop.conf.Configuration;
2021
import org.apache.hadoop.hbase.master.RegionPlan;
2122

2223
public abstract class RegionPlanConditional {
23-
public RegionPlanConditional(BalancerClusterState cluster) {
24+
public RegionPlanConditional(Configuration conf, BalancerClusterState cluster) {
2425
}
2526

2627
abstract boolean isViolating(RegionPlan regionPlan);

0 commit comments

Comments
 (0)