Skip to content

Commit f20efaf

Browse files
authored
HBASE-27650 Merging empty regions corrupts meta cache (#5037)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
1 parent 4a9cf99 commit f20efaf

File tree

5 files changed

+472
-186
lines changed

5 files changed

+472
-186
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java

Lines changed: 44 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,12 @@
2323
import static org.apache.hadoop.hbase.HConstants.USE_META_REPLICAS;
2424
import static org.apache.hadoop.hbase.HConstants.ZEROES;
2525
import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME;
26-
import static org.apache.hadoop.hbase.client.AsyncRegionLocatorHelper.canUpdateOnError;
2726
import static org.apache.hadoop.hbase.client.AsyncRegionLocatorHelper.createRegionLocations;
2827
import static org.apache.hadoop.hbase.client.AsyncRegionLocatorHelper.isGood;
29-
import static org.apache.hadoop.hbase.client.AsyncRegionLocatorHelper.removeRegionLocation;
3028
import static org.apache.hadoop.hbase.client.ConnectionUtils.createClosestRowAfter;
3129
import static org.apache.hadoop.hbase.client.ConnectionUtils.isEmptyStopRow;
3230
import static org.apache.hadoop.hbase.client.RegionInfo.createRegionName;
3331
import static org.apache.hadoop.hbase.client.RegionLocator.LOCATOR_META_REPLICAS_MODE;
34-
import static org.apache.hadoop.hbase.util.Bytes.BYTES_COMPARATOR;
3532
import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent;
3633

3734
import java.io.IOException;
@@ -47,8 +44,6 @@
4744
import java.util.concurrent.CompletableFuture;
4845
import java.util.concurrent.ConcurrentHashMap;
4946
import java.util.concurrent.ConcurrentMap;
50-
import java.util.concurrent.ConcurrentNavigableMap;
51-
import java.util.concurrent.ConcurrentSkipListMap;
5247
import java.util.concurrent.TimeUnit;
5348
import org.apache.commons.lang3.ObjectUtils;
5449
import org.apache.hadoop.hbase.CatalogFamilyFormat;
@@ -66,8 +61,6 @@
6661
import org.slf4j.Logger;
6762
import org.slf4j.LoggerFactory;
6863

69-
import org.apache.hbase.thirdparty.com.google.common.base.Objects;
70-
7164
/**
7265
* The asynchronous locator for regions other than meta.
7366
*/
@@ -146,13 +139,15 @@ public void complete() {
146139

147140
private static final class TableCache {
148141

149-
private final ConcurrentNavigableMap<byte[], RegionLocations> cache =
150-
new ConcurrentSkipListMap<>(BYTES_COMPARATOR);
151-
152142
private final Set<LocateRequest> pendingRequests = new HashSet<>();
153143

154144
private final Map<LocateRequest, CompletableFuture<RegionLocations>> allRequests =
155145
new LinkedHashMap<>();
146+
private final AsyncRegionLocationCache regionLocationCache;
147+
148+
public TableCache(TableName tableName) {
149+
regionLocationCache = new AsyncRegionLocationCache(tableName);
150+
}
156151

157152
public boolean hasQuota(int max) {
158153
return pendingRequests.size() < max;
@@ -261,76 +256,7 @@ private boolean tryComplete(LocateRequest req, CompletableFuture<RegionLocations
261256
}
262257

263258
private TableCache getTableCache(TableName tableName) {
264-
return computeIfAbsent(cache, tableName, TableCache::new);
265-
}
266-
267-
private boolean isEqual(RegionLocations locs1, RegionLocations locs2) {
268-
HRegionLocation[] locArr1 = locs1.getRegionLocations();
269-
HRegionLocation[] locArr2 = locs2.getRegionLocations();
270-
if (locArr1.length != locArr2.length) {
271-
return false;
272-
}
273-
for (int i = 0; i < locArr1.length; i++) {
274-
// do not need to compare region info
275-
HRegionLocation loc1 = locArr1[i];
276-
HRegionLocation loc2 = locArr2[i];
277-
if (loc1 == null) {
278-
if (loc2 != null) {
279-
return false;
280-
}
281-
} else {
282-
if (loc2 == null) {
283-
return false;
284-
}
285-
if (loc1.getSeqNum() != loc2.getSeqNum()) {
286-
return false;
287-
}
288-
if (!Objects.equal(loc1.getServerName(), loc2.getServerName())) {
289-
return false;
290-
}
291-
}
292-
}
293-
return true;
294-
}
295-
296-
// if we successfully add the locations to cache, return the locations, otherwise return the one
297-
// which prevents us being added. The upper layer can use this value to complete pending requests.
298-
private RegionLocations addToCache(TableCache tableCache, RegionLocations locs) {
299-
LOG.trace("Try adding {} to cache", locs);
300-
byte[] startKey = locs.getRegionLocation().getRegion().getStartKey();
301-
for (;;) {
302-
RegionLocations oldLocs = tableCache.cache.putIfAbsent(startKey, locs);
303-
if (oldLocs == null) {
304-
return locs;
305-
}
306-
// check whether the regions are the same, this usually happens when table is split/merged, or
307-
// deleted and recreated again.
308-
RegionInfo region = locs.getRegionLocation().getRegion();
309-
RegionInfo oldRegion = oldLocs.getRegionLocation().getRegion();
310-
if (region.getEncodedName().equals(oldRegion.getEncodedName())) {
311-
RegionLocations mergedLocs = oldLocs.mergeLocations(locs);
312-
if (isEqual(mergedLocs, oldLocs)) {
313-
// the merged one is the same with the old one, give up
314-
LOG.trace("Will not add {} to cache because the old value {} "
315-
+ " is newer than us or has the same server name."
316-
+ " Maybe it is updated before we replace it", locs, oldLocs);
317-
return oldLocs;
318-
}
319-
if (tableCache.cache.replace(startKey, oldLocs, mergedLocs)) {
320-
return mergedLocs;
321-
}
322-
} else {
323-
// the region is different, here we trust the one we fetched. This maybe wrong but finally
324-
// the upper layer can detect this and trigger removal of the wrong locations
325-
if (LOG.isDebugEnabled()) {
326-
LOG.debug("The newnly fetch region {} is different from the old one {} for row '{}',"
327-
+ " try replaing the old one...", region, oldRegion, Bytes.toStringBinary(startKey));
328-
}
329-
if (tableCache.cache.replace(startKey, oldLocs, locs)) {
330-
return locs;
331-
}
332-
}
333-
}
259+
return computeIfAbsent(cache, tableName, () -> new TableCache(tableName));
334260
}
335261

336262
private void complete(TableName tableName, LocateRequest req, RegionLocations locs,
@@ -342,7 +268,7 @@ private void complete(TableName tableName, LocateRequest req, RegionLocations lo
342268
Optional<LocateRequest> toSend = Optional.empty();
343269
TableCache tableCache = getTableCache(tableName);
344270
if (locs != null) {
345-
RegionLocations addedLocs = addToCache(tableCache, locs);
271+
RegionLocations addedLocs = tableCache.regionLocationCache.add(locs);
346272
List<RegionLocationsFutureResult> futureResultList = new ArrayList<>();
347273
synchronized (tableCache) {
348274
tableCache.pendingRequests.remove(req);
@@ -420,62 +346,24 @@ private void recordCacheMiss() {
420346
conn.getConnectionMetrics().ifPresent(MetricsConnection::incrMetaCacheMiss);
421347
}
422348

423-
private RegionLocations locateRowInCache(TableCache tableCache, TableName tableName, byte[] row,
424-
int replicaId) {
425-
Map.Entry<byte[], RegionLocations> entry = tableCache.cache.floorEntry(row);
426-
if (entry == null) {
349+
private RegionLocations locateRowInCache(TableCache tableCache, byte[] row, int replicaId) {
350+
RegionLocations locs = tableCache.regionLocationCache.findForRow(row, replicaId);
351+
if (locs == null) {
427352
recordCacheMiss();
428-
return null;
429-
}
430-
RegionLocations locs = entry.getValue();
431-
HRegionLocation loc = locs.getRegionLocation(replicaId);
432-
if (loc == null) {
433-
recordCacheMiss();
434-
return null;
435-
}
436-
byte[] endKey = loc.getRegion().getEndKey();
437-
if (isEmptyStopRow(endKey) || Bytes.compareTo(row, endKey) < 0) {
438-
if (LOG.isTraceEnabled()) {
439-
LOG.trace("Found {} in cache for {}, row='{}', locateType={}, replicaId={}", loc, tableName,
440-
Bytes.toStringBinary(row), RegionLocateType.CURRENT, replicaId);
441-
}
442-
recordCacheHit();
443-
return locs;
444353
} else {
445-
recordCacheMiss();
446-
return null;
354+
recordCacheHit();
447355
}
356+
return locs;
448357
}
449358

450-
private RegionLocations locateRowBeforeInCache(TableCache tableCache, TableName tableName,
451-
byte[] row, int replicaId) {
452-
boolean isEmptyStopRow = isEmptyStopRow(row);
453-
Map.Entry<byte[], RegionLocations> entry =
454-
isEmptyStopRow ? tableCache.cache.lastEntry() : tableCache.cache.lowerEntry(row);
455-
if (entry == null) {
456-
recordCacheMiss();
457-
return null;
458-
}
459-
RegionLocations locs = entry.getValue();
460-
HRegionLocation loc = locs.getRegionLocation(replicaId);
461-
if (loc == null) {
359+
private RegionLocations locateRowBeforeInCache(TableCache tableCache, byte[] row, int replicaId) {
360+
RegionLocations locs = tableCache.regionLocationCache.findForBeforeRow(row, replicaId);
361+
if (locs == null) {
462362
recordCacheMiss();
463-
return null;
464-
}
465-
if (
466-
isEmptyStopRow(loc.getRegion().getEndKey())
467-
|| (!isEmptyStopRow && Bytes.compareTo(loc.getRegion().getEndKey(), row) >= 0)
468-
) {
469-
if (LOG.isTraceEnabled()) {
470-
LOG.trace("Found {} in cache for {}, row='{}', locateType={}, replicaId={}", loc, tableName,
471-
Bytes.toStringBinary(row), RegionLocateType.BEFORE, replicaId);
472-
}
473-
recordCacheHit();
474-
return locs;
475363
} else {
476-
recordCacheMiss();
477-
return null;
364+
recordCacheHit();
478365
}
366+
return locs;
479367
}
480368

481369
private void locateInMeta(TableName tableName, LocateRequest req) {
@@ -569,7 +457,7 @@ public void onNext(Result[] results, ScanController controller) {
569457
if (info == null || info.isOffline() || info.isSplitParent()) {
570458
continue;
571459
}
572-
RegionLocations addedLocs = addToCache(tableCache, locs);
460+
RegionLocations addedLocs = tableCache.regionLocationCache.add(locs);
573461
List<RegionLocationsFutureResult> futureResultList = new ArrayList<>();
574462
synchronized (tableCache) {
575463
futureResultList.addAll(tableCache.clearCompletedRequests(addedLocs));
@@ -581,11 +469,11 @@ public void onNext(Result[] results, ScanController controller) {
581469
});
582470
}
583471

584-
private RegionLocations locateInCache(TableCache tableCache, TableName tableName, byte[] row,
585-
int replicaId, RegionLocateType locateType) {
472+
private RegionLocations locateInCache(TableCache tableCache, byte[] row, int replicaId,
473+
RegionLocateType locateType) {
586474
return locateType.equals(RegionLocateType.BEFORE)
587-
? locateRowBeforeInCache(tableCache, tableName, row, replicaId)
588-
: locateRowInCache(tableCache, tableName, row, replicaId);
475+
? locateRowBeforeInCache(tableCache, row, replicaId)
476+
: locateRowInCache(tableCache, row, replicaId);
589477
}
590478

591479
// locateToPrevious is true means we will use the start key of a region to locate the region
@@ -597,7 +485,7 @@ private CompletableFuture<RegionLocations> getRegionLocationsInternal(TableName
597485
assert !locateType.equals(RegionLocateType.AFTER);
598486
TableCache tableCache = getTableCache(tableName);
599487
if (!reload) {
600-
RegionLocations locs = locateInCache(tableCache, tableName, row, replicaId, locateType);
488+
RegionLocations locs = locateInCache(tableCache, row, replicaId, locateType);
601489
if (isGood(locs, replicaId)) {
602490
return CompletableFuture.completedFuture(locs);
603491
}
@@ -608,7 +496,7 @@ private CompletableFuture<RegionLocations> getRegionLocationsInternal(TableName
608496
synchronized (tableCache) {
609497
// check again
610498
if (!reload) {
611-
RegionLocations locs = locateInCache(tableCache, tableName, row, replicaId, locateType);
499+
RegionLocations locs = locateInCache(tableCache, row, replicaId, locateType);
612500
if (isGood(locs, replicaId)) {
613501
return CompletableFuture.completedFuture(locs);
614502
}
@@ -647,51 +535,33 @@ private void recordClearRegionCache() {
647535

648536
private void removeLocationFromCache(HRegionLocation loc) {
649537
TableCache tableCache = cache.get(loc.getRegion().getTable());
650-
if (tableCache == null) {
651-
return;
652-
}
653-
byte[] startKey = loc.getRegion().getStartKey();
654-
for (;;) {
655-
RegionLocations oldLocs = tableCache.cache.get(startKey);
656-
if (oldLocs == null) {
657-
return;
658-
}
659-
HRegionLocation oldLoc = oldLocs.getRegionLocation(loc.getRegion().getReplicaId());
660-
if (!canUpdateOnError(loc, oldLoc)) {
661-
return;
662-
}
663-
// Tell metaReplicaSelector that the location is stale. It will create a stale entry
664-
// with timestamp internally. Next time the client looks up the same location,
665-
// it will pick a different meta replica region.
666-
if (this.metaReplicaMode == CatalogReplicaMode.LOAD_BALANCE) {
667-
metaReplicaSelector.onError(loc);
538+
if (tableCache != null) {
539+
if (tableCache.regionLocationCache.remove(loc)) {
540+
recordClearRegionCache();
541+
updateMetaReplicaSelector(loc);
668542
}
543+
}
544+
}
669545

670-
RegionLocations newLocs = removeRegionLocation(oldLocs, loc.getRegion().getReplicaId());
671-
if (newLocs == null) {
672-
if (tableCache.cache.remove(startKey, oldLocs)) {
673-
recordClearRegionCache();
674-
return;
675-
}
676-
} else {
677-
if (tableCache.cache.replace(startKey, oldLocs, newLocs)) {
678-
recordClearRegionCache();
679-
return;
680-
}
681-
}
546+
private void updateMetaReplicaSelector(HRegionLocation loc) {
547+
// Tell metaReplicaSelector that the location is stale. It will create a stale entry
548+
// with timestamp internally. Next time the client looks up the same location,
549+
// it will pick a different meta replica region.
550+
if (metaReplicaMode == CatalogReplicaMode.LOAD_BALANCE) {
551+
metaReplicaSelector.onError(loc);
682552
}
683553
}
684554

685555
void addLocationToCache(HRegionLocation loc) {
686-
addToCache(getTableCache(loc.getRegion().getTable()), createRegionLocations(loc));
556+
getTableCache(loc.getRegion().getTable()).regionLocationCache.add(createRegionLocations(loc));
687557
}
688558

689559
private HRegionLocation getCachedLocation(HRegionLocation loc) {
690560
TableCache tableCache = cache.get(loc.getRegion().getTable());
691561
if (tableCache == null) {
692562
return null;
693563
}
694-
RegionLocations locs = tableCache.cache.get(loc.getRegion().getStartKey());
564+
RegionLocations locs = tableCache.regionLocationCache.get(loc.getRegion().getStartKey());
695565
return locs != null ? locs.getRegionLocation(loc.getRegion().getReplicaId()) : null;
696566
}
697567

@@ -716,8 +586,8 @@ void clearCache(TableName tableName) {
716586
}
717587
}
718588
futureResultList.forEach(RegionLocationsFutureResult::complete);
719-
conn.getConnectionMetrics()
720-
.ifPresent(metrics -> metrics.incrMetaCacheNumClearRegion(tableCache.cache.size()));
589+
conn.getConnectionMetrics().ifPresent(
590+
metrics -> metrics.incrMetaCacheNumClearRegion(tableCache.regionLocationCache.size()));
721591
}
722592

723593
void clearCache() {
@@ -726,19 +596,7 @@ void clearCache() {
726596

727597
void clearCache(ServerName serverName) {
728598
for (TableCache tableCache : cache.values()) {
729-
for (Map.Entry<byte[], RegionLocations> entry : tableCache.cache.entrySet()) {
730-
byte[] regionName = entry.getKey();
731-
RegionLocations locs = entry.getValue();
732-
RegionLocations newLocs = locs.removeByServer(serverName);
733-
if (locs == newLocs) {
734-
continue;
735-
}
736-
if (newLocs.isEmpty()) {
737-
tableCache.cache.remove(regionName, locs);
738-
} else {
739-
tableCache.cache.replace(regionName, locs, newLocs);
740-
}
741-
}
599+
tableCache.regionLocationCache.removeForServer(serverName);
742600
}
743601
}
744602

@@ -748,7 +606,7 @@ RegionLocations getRegionLocationInCache(TableName tableName, byte[] row) {
748606
if (tableCache == null) {
749607
return null;
750608
}
751-
return locateRowInCache(tableCache, tableName, row, RegionReplicaUtil.DEFAULT_REPLICA_ID);
609+
return locateRowInCache(tableCache, row, RegionReplicaUtil.DEFAULT_REPLICA_ID);
752610
}
753611

754612
// only used for testing whether we have cached the location for a table.
@@ -757,6 +615,7 @@ int getNumberOfCachedRegionLocations(TableName tableName) {
757615
if (tableCache == null) {
758616
return 0;
759617
}
760-
return tableCache.cache.values().stream().mapToInt(RegionLocations::numNonNullElements).sum();
618+
return tableCache.regionLocationCache.getAll().stream()
619+
.mapToInt(RegionLocations::numNonNullElements).sum();
761620
}
762621
}

0 commit comments

Comments
 (0)