Skip to content

HBASE-26618 Involving primary meta region in meta scan with CatalogRe… #4321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@
* balancing algorithm. It maintains a stale location cache for each table. Whenever client looks
* up location, it first check if the row is the stale location cache. If yes, the location from
* catalog replica is stale, it will go to the primary region to look up update-to-date location;
* otherwise, it will randomly pick up a replica region for lookup. When clients receive
* RegionNotServedException from region servers, it will add these region locations to the stale
* location cache. The stale cache will be cleaned up periodically by a chore.</p>
* otherwise, it will randomly pick up a replica region or primary region for lookup. When clients
* receive RegionNotServedException from region servers, it will add these region locations to the
* stale location cache. The stale cache will be cleaned up periodically by a chore.</p>
*
* It follows a simple algorithm to choose a replica to go:
* It follows a simple algorithm to choose a meta replica region (including primary meta) to go:
*
* <ol>
* <li>If there is no stale location entry for rows it looks up, it will randomly
* pick a replica region to do lookup. </li>
* pick a meta replica region (including primary meta) to do lookup. </li>
* <li>If the location from the replica region is stale, client gets RegionNotServedException
* from region server, in this case, it will create StaleLocationCacheEntry in
* CatalogReplicaLoadBalanceReplicaSimpleSelector.</li>
Expand Down Expand Up @@ -139,7 +139,7 @@ public void onError(HRegionLocation loc) {
}

/**
* Select an random replica id. In case there is no replica region configured, return
* Select an random replica id (including the primary replica id). In case there is no replica region configured, return
* the primary replica id.
* @return Replica id
*/
Expand All @@ -153,7 +153,7 @@ private int getRandomReplicaId() {
if (cachedNumOfReplicas <= 1) {
return RegionInfo.DEFAULT_REPLICA_ID;
}
return 1 + ThreadLocalRandom.current().nextInt(cachedNumOfReplicas - 1);
return ThreadLocalRandom.current().nextInt(cachedNumOfReplicas);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import java.io.IOException;
import java.util.concurrent.TimeUnit;
import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
Expand Down Expand Up @@ -105,9 +106,13 @@ public void testMetaChangeFromReplicaNoReplica() throws IOException, Interrupted
return numOfReplicas;
});

assertNotEquals(
metaSelector.select(TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT),
RegionReplicaUtil.DEFAULT_REPLICA_ID);
// Loop for 100 times, it should cover all replica ids.
int[] replicaIdCount = new int[numOfMetaReplica];
IntStream.range(1, 100).forEach(i -> replicaIdCount[metaSelector.select(
TableName.valueOf("test"), EMPTY_START_ROW, RegionLocateType.CURRENT)] ++);

// Make sure each replica id is returned by select() call, including primary replica id.
IntStream.range(0, numOfMetaReplica).forEach(i -> assertNotEquals(replicaIdCount[i], 0));

// Change to No meta replica
HBaseTestingUtil.setReplicas(admin, TableName.META_TABLE_NAME, 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,16 @@ private void primaryIncreaseReplicaNoChange(final long[] before, final long[] af
}
}

private void primaryIncreaseReplicaIncrease(final long[] before, final long[] after) {
// There are read requests increase for primary meta replica.
assertTrue(after[RegionInfo.DEFAULT_REPLICA_ID] > before[RegionInfo.DEFAULT_REPLICA_ID]);

// There are read requests incrase for meta replica regions.
for (int i = 1; i < after.length; i++) {
assertTrue(after[i] > before[i]);
}
}

private void getMetaReplicaReadRequests(final Region[] metaRegions, final long[] counters) {
int i = 0;
for (Region r : metaRegions) {
Expand Down Expand Up @@ -455,9 +465,8 @@ public void testHBaseMetaReplicaGets() throws Exception {

getMetaReplicaReadRequests(metaRegions, readReqsForMetaReplicasAfterGet);

// There is no read requests increase for primary meta replica.
// For rest of meta replicas, there are more reads against them.
primaryNoChangeReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);
// There are more reads against all meta replica regions, including the primary region.
primaryIncreaseReplicaIncrease(readReqsForMetaReplicas, readReqsForMetaReplicasAfterGet);

// move one of regions so it meta cache may be invalid.
HTU.moveRegionAndWait(userRegion.getRegionInfo(), destRs.getServerName());
Expand Down