Skip to content

Commit 917bfce

Browse files
authored
Merge branch 'HBASE-27389' into HBASE-27389-master-rebase
2 parents fa4c896 + 69d980a commit 917bfce

File tree

27 files changed

+1731
-128
lines changed

27 files changed

+1731
-128
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java

Lines changed: 154 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3535
import org.apache.hadoop.hbase.master.RackManager;
3636
import org.apache.hadoop.hbase.net.Address;
37+
import org.apache.hadoop.hbase.util.Pair;
3738
import org.apache.yetus.audience.InterfaceAudience;
3839
import org.slf4j.Logger;
3940
import org.slf4j.LoggerFactory;
@@ -114,6 +115,12 @@ class BalancerClusterState {
114115
private float[][] rackLocalities;
115116
// Maps localityType -> region -> [server|rack]Index with highest locality
116117
private int[][] regionsToMostLocalEntities;
118+
// Maps region -> serverIndex -> regionCacheRatio of a region on a server
119+
private Map<Pair<Integer, Integer>, Float> regionIndexServerIndexRegionCachedRatio;
120+
// Maps regionIndex -> serverIndex with best region cache ratio
121+
private int[] regionServerIndexWithBestRegionCachedRatio;
122+
// Maps regionName -> oldServerName -> cache ratio of the region on the old server
123+
Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap;
117124

118125
static class DefaultRackManager extends RackManager {
119126
@Override
@@ -125,13 +132,20 @@ public String getRack(ServerName server) {
125132
BalancerClusterState(Map<ServerName, List<RegionInfo>> clusterState,
126133
Map<String, Deque<BalancerRegionLoad>> loads, RegionHDFSBlockLocationFinder regionFinder,
127134
RackManager rackManager) {
128-
this(null, clusterState, loads, regionFinder, rackManager);
135+
this(null, clusterState, loads, regionFinder, rackManager, null);
136+
}
137+
138+
protected BalancerClusterState(Map<ServerName, List<RegionInfo>> clusterState,
139+
Map<String, Deque<BalancerRegionLoad>> loads, RegionHDFSBlockLocationFinder regionFinder,
140+
RackManager rackManager, Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio) {
141+
this(null, clusterState, loads, regionFinder, rackManager, oldRegionServerRegionCacheRatio);
129142
}
130143

131144
@SuppressWarnings("unchecked")
132145
BalancerClusterState(Collection<RegionInfo> unassignedRegions,
133146
Map<ServerName, List<RegionInfo>> clusterState, Map<String, Deque<BalancerRegionLoad>> loads,
134-
RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager) {
147+
RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager,
148+
Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio) {
135149
if (unassignedRegions == null) {
136150
unassignedRegions = Collections.emptyList();
137151
}
@@ -145,6 +159,8 @@ public String getRack(ServerName server) {
145159
tables = new ArrayList<>();
146160
this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
147161

162+
this.regionCacheRatioOnOldServerMap = oldRegionServerRegionCacheRatio;
163+
148164
numRegions = 0;
149165

150166
List<List<Integer>> serversPerHostList = new ArrayList<>();
@@ -541,6 +557,142 @@ private void computeCachedLocalities() {
541557

542558
}
543559

560+
/**
561+
* Returns the size of hFiles from the most recent RegionLoad for region
562+
*/
563+
public int getTotalRegionHFileSizeMB(int region) {
564+
Deque<BalancerRegionLoad> load = regionLoads[region];
565+
if (load == null) {
566+
// This means, that the region has no actual data on disk
567+
return 0;
568+
}
569+
return regionLoads[region].getLast().getRegionSizeMB();
570+
}
571+
572+
/**
573+
* Returns the weighted cache ratio of a region on the given region server
574+
*/
575+
public float getOrComputeWeightedRegionCacheRatio(int region, int server) {
576+
return getTotalRegionHFileSizeMB(region) * getOrComputeRegionCacheRatio(region, server);
577+
}
578+
579+
/**
580+
* Returns the amount by which a region is cached on a given region server. If the region is not
581+
* currently hosted on the given region server, then find out if it was previously hosted there
582+
* and return the old cache ratio.
583+
*/
584+
protected float getRegionCacheRatioOnRegionServer(int region, int regionServerIndex) {
585+
float regionCacheRatio = 0.0f;
586+
587+
// Get the current region cache ratio if the region is hosted on the server regionServerIndex
588+
for (int regionIndex : regionsPerServer[regionServerIndex]) {
589+
if (region != regionIndex) {
590+
continue;
591+
}
592+
593+
Deque<BalancerRegionLoad> regionLoadList = regionLoads[regionIndex];
594+
595+
// The region is currently hosted on this region server. Get the region cache ratio for this
596+
// region on this server
597+
regionCacheRatio =
598+
regionLoadList == null ? 0.0f : regionLoadList.getLast().getCurrentRegionCacheRatio();
599+
600+
return regionCacheRatio;
601+
}
602+
603+
// Region is not currently hosted on this server. Check if the region was cached on this
604+
// server earlier. This can happen when the server was shutdown and the cache was persisted.
605+
// Search using the region name and server name and not the index id and server id as these ids
606+
// may change when a server is marked as dead or a new server is added.
607+
String regionEncodedName = regions[region].getEncodedName();
608+
ServerName serverName = servers[regionServerIndex];
609+
if (
610+
regionCacheRatioOnOldServerMap != null
611+
&& regionCacheRatioOnOldServerMap.containsKey(regionEncodedName)
612+
) {
613+
Pair<ServerName, Float> cacheRatioOfRegionOnServer =
614+
regionCacheRatioOnOldServerMap.get(regionEncodedName);
615+
if (ServerName.isSameAddress(cacheRatioOfRegionOnServer.getFirst(), serverName)) {
616+
regionCacheRatio = cacheRatioOfRegionOnServer.getSecond();
617+
if (LOG.isDebugEnabled()) {
618+
LOG.debug("Old cache ratio found for region {} on server {}: {}", regionEncodedName,
619+
serverName, regionCacheRatio);
620+
}
621+
}
622+
}
623+
return regionCacheRatio;
624+
}
625+
626+
/**
627+
* Populate the maps containing information about how much a region is cached on a region server.
628+
*/
629+
private void computeRegionServerRegionCacheRatio() {
630+
regionIndexServerIndexRegionCachedRatio = new HashMap<>();
631+
regionServerIndexWithBestRegionCachedRatio = new int[numRegions];
632+
633+
for (int region = 0; region < numRegions; region++) {
634+
float bestRegionCacheRatio = 0.0f;
635+
int serverWithBestRegionCacheRatio = 0;
636+
for (int server = 0; server < numServers; server++) {
637+
float regionCacheRatio = getRegionCacheRatioOnRegionServer(region, server);
638+
if (regionCacheRatio > 0.0f || server == regionIndexToServerIndex[region]) {
639+
// A region with cache ratio 0 on a server means nothing. Hence, just make a note of
640+
// cache ratio only if the cache ratio is greater than 0.
641+
Pair<Integer, Integer> regionServerPair = new Pair<>(region, server);
642+
regionIndexServerIndexRegionCachedRatio.put(regionServerPair, regionCacheRatio);
643+
}
644+
if (regionCacheRatio > bestRegionCacheRatio) {
645+
serverWithBestRegionCacheRatio = server;
646+
// If the server currently hosting the region has equal cache ratio to a historical
647+
// server, consider the current server to keep hosting the region
648+
bestRegionCacheRatio = regionCacheRatio;
649+
} else if (
650+
regionCacheRatio == bestRegionCacheRatio && server == regionIndexToServerIndex[region]
651+
) {
652+
// If two servers have same region cache ratio, then the server currently hosting the
653+
// region
654+
// should retain the region
655+
serverWithBestRegionCacheRatio = server;
656+
}
657+
}
658+
regionServerIndexWithBestRegionCachedRatio[region] = serverWithBestRegionCacheRatio;
659+
Pair<Integer, Integer> regionServerPair =
660+
new Pair<>(region, regionIndexToServerIndex[region]);
661+
float tempRegionCacheRatio = regionIndexServerIndexRegionCachedRatio.get(regionServerPair);
662+
if (tempRegionCacheRatio > bestRegionCacheRatio) {
663+
LOG.warn(
664+
"INVALID CONDITION: region {} on server {} cache ratio {} is greater than the "
665+
+ "best region cache ratio {} on server {}",
666+
regions[region].getEncodedName(), servers[regionIndexToServerIndex[region]],
667+
tempRegionCacheRatio, bestRegionCacheRatio, servers[serverWithBestRegionCacheRatio]);
668+
}
669+
}
670+
}
671+
672+
protected float getOrComputeRegionCacheRatio(int region, int server) {
673+
if (
674+
regionServerIndexWithBestRegionCachedRatio == null
675+
|| regionIndexServerIndexRegionCachedRatio.isEmpty()
676+
) {
677+
computeRegionServerRegionCacheRatio();
678+
}
679+
680+
Pair<Integer, Integer> regionServerPair = new Pair<>(region, server);
681+
return regionIndexServerIndexRegionCachedRatio.containsKey(regionServerPair)
682+
? regionIndexServerIndexRegionCachedRatio.get(regionServerPair)
683+
: 0.0f;
684+
}
685+
686+
public int[] getOrComputeServerWithBestRegionCachedRatio() {
687+
if (
688+
regionServerIndexWithBestRegionCachedRatio == null
689+
|| regionIndexServerIndexRegionCachedRatio.isEmpty()
690+
) {
691+
computeRegionServerRegionCacheRatio();
692+
}
693+
return regionServerIndexWithBestRegionCachedRatio;
694+
}
695+
544696
/**
545697
* Maps region index to rack index
546698
*/

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,17 @@ class BalancerRegionLoad {
3434
private final long writeRequestsCount;
3535
private final int memStoreSizeMB;
3636
private final int storefileSizeMB;
37+
private final int regionSizeMB;
38+
private final float currentRegionPrefetchRatio;
3739

3840
BalancerRegionLoad(RegionMetrics regionMetrics) {
3941
readRequestsCount = regionMetrics.getReadRequestCount();
4042
cpRequestsCount = regionMetrics.getCpRequestCount();
4143
writeRequestsCount = regionMetrics.getWriteRequestCount();
4244
memStoreSizeMB = (int) regionMetrics.getMemStoreSize().get(Size.Unit.MEGABYTE);
4345
storefileSizeMB = (int) regionMetrics.getStoreFileSize().get(Size.Unit.MEGABYTE);
46+
regionSizeMB = (int) regionMetrics.getRegionSizeMB().get(Size.Unit.MEGABYTE);
47+
currentRegionPrefetchRatio = regionMetrics.getCurrentRegionCachedRatio();
4448
}
4549

4650
public long getReadRequestsCount() {
@@ -62,4 +66,12 @@ public int getMemStoreSizeMB() {
6266
public int getStorefileSizeMB() {
6367
return storefileSizeMB;
6468
}
69+
70+
public int getRegionSizeMB() {
71+
return regionSizeMB;
72+
}
73+
74+
public float getCurrentRegionCacheRatio() {
75+
return currentRegionPrefetchRatio;
76+
}
6577
}

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ private BalancerClusterState createCluster(List<ServerName> servers,
232232
clusterState.put(server, Collections.emptyList());
233233
}
234234
}
235-
return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager);
235+
return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager,
236+
null);
236237
}
237238

238239
private List<ServerName> findIdleServers(List<ServerName> servers) {

0 commit comments

Comments
 (0)