Skip to content

Commit f9abe3d

Browse files
authored
HBASE-26618 Involving primary meta region in meta scan with CatalogRe… (#4321) (#4326)
Signed-off-by: Michael Stack <stack@apache.org>
1 parent d001fb0 commit f9abe3d

File tree

3 files changed

+27
-13
lines changed

3 files changed

+27
-13
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/CatalogReplicaLoadBalanceSimpleSelector.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@
4646
* balancing algorithm. It maintains a stale location cache for each table. Whenever client looks
4747
* up location, it first check if the row is the stale location cache. If yes, the location from
4848
* catalog replica is stale, it will go to the primary region to look up update-to-date location;
49-
* otherwise, it will randomly pick up a replica region for lookup. When clients receive
50-
* RegionNotServedException from region servers, it will add these region locations to the stale
51-
* location cache. The stale cache will be cleaned up periodically by a chore.</p>
49+
* otherwise, it will randomly pick up a replica region or primary region for lookup. When clients
50+
* receive RegionNotServedException from region servers, it will add these region locations to the
51+
* stale location cache. The stale cache will be cleaned up periodically by a chore.</p>
5252
*
53-
* It follows a simple algorithm to choose a replica to go:
53+
* It follows a simple algorithm to choose a meta replica region (including primary meta) to go:
5454
*
5555
* <ol>
5656
* <li>If there is no stale location entry for rows it looks up, it will randomly
57-
* pick a replica region to do lookup. </li>
57+
* pick a meta replica region (including primary meta) to do lookup. </li>
5858
* <li>If the location from the replica region is stale, client gets RegionNotServedException
5959
* from region server, in this case, it will create StaleLocationCacheEntry in
6060
* CatalogReplicaLoadBalanceReplicaSimpleSelector.</li>
@@ -141,7 +141,7 @@ public void onError(HRegionLocation loc) {
141141
}
142142

143143
/**
144-
* Select an random replica id. In case there is no replica region configured, return
144+
* Select an random replica id (including the primary replica id). In case there is no replica region configured, return
145145
* the primary replica id.
146146
* @return Replica id
147147
*/
@@ -155,7 +155,7 @@ private int getRandomReplicaId() {
155155
if (cachedNumOfReplicas <= 1) {
156156
return RegionInfo.DEFAULT_REPLICA_ID;
157157
}
158-
return 1 + ThreadLocalRandom.current().nextInt(cachedNumOfReplicas - 1);
158+
return ThreadLocalRandom.current().nextInt(cachedNumOfReplicas);
159159
}
160160

161161
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCatalogReplicaLoadBalanceSimpleSelector.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
import java.io.IOException;
2626
import java.util.concurrent.TimeUnit;
27+
import java.util.stream.IntStream;
2728
import org.apache.hadoop.conf.Configuration;
2829
import org.apache.hadoop.hbase.HBaseClassTestRule;
2930
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -105,9 +106,13 @@ public void testMetaChangeFromReplicaNoReplica() throws IOException, Interrupted
105106
return numOfReplicas;
106107
});
107108

108-
assertNotEquals(
109-
metaSelector.select(TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT),
110-
RegionReplicaUtil.DEFAULT_REPLICA_ID);
109+
// Loop for 100 times, it should cover all replica ids.
110+
int[] replicaIdCount = new int[numOfMetaReplica];
111+
IntStream.range(1, 100).forEach(i -> replicaIdCount[metaSelector.select(
112+
TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT)] ++);
113+
114+
// Make sure each replica id is returned by select() call, including primary replica id.
115+
IntStream.range(0, numOfMetaReplica).forEach(i -> assertNotEquals(replicaIdCount[i], 0));
111116

112117
// Change to No meta replica
113118
HBaseTestingUtility.setReplicas(admin, TableName.META_TABLE_NAME, 1);

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestMetaRegionReplicaReplicationEndpoint.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,16 @@ private void primaryMayIncreaseReplicaNoChange(final long[] before, final long[]
519519
}
520520
}
521521

522+
private void primaryIncreaseReplicaIncrease(final long[] before, final long[] after) {
523+
// There are read requests increase for primary meta replica.
524+
assertTrue(after[RegionInfo.DEFAULT_REPLICA_ID] > before[RegionInfo.DEFAULT_REPLICA_ID]);
525+
526+
// There are read requests incrase for meta replica regions.
527+
for (int i = 1; i < after.length; i++) {
528+
assertTrue(after[i] > before[i]);
529+
}
530+
}
531+
522532
private void getMetaReplicaReadRequests(final Region[] metaRegions, final long[] counters) {
523533
int i = 0;
524534
for (Region r : metaRegions) {
@@ -579,9 +589,8 @@ public void testHBaseMetaReplicaGets() throws Exception {
579589

580590
getMetaReplicaReadRequests(metaRegions, readReqsForMetaReplicasAfterGet);
581591

582-
// There is no read requests increase for primary meta replica.
583-
// For rest of meta replicas, there are more reads against them.
584-
primaryNoChangeReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);
592+
// There are more reads against all meta replica regions, including the primary region.
593+
primaryIncreaseReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);
585594

586595
// move one of regions so it meta cache may be invalid.
587596
HTU.moveRegionAndWait(userRegion.getRegionInfo(), destRs.getServerName());

0 commit comments

Comments
 (0)