Skip to content

Commit 9502375

Browse files
authored
HBASE-24370 Avoid aggressive MergeRegion and GCMultipleMergedRegionsProcedure (#1719) (#1762)
Signed-off-by: Jan Hentschel <jan.hentschel@ultratendency.com>
1 parent ae4f1b4 commit 9502375

File tree

4 files changed

+134
-16
lines changed

4 files changed

+134
-16
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,11 @@ public boolean cleanMergeQualifier(final RegionInfo region) throws IOException {
424424
// It doesn't have merge qualifier, no need to clean
425425
return true;
426426
}
427-
return cleanMergeRegion(region, parents);
427+
428+
// If a parent region is a merged child region and GC has not kicked in/finish its work yet,
429+
// return false in this case to avoid kicking in a merge, trying later.
430+
cleanMergeRegion(region, parents);
431+
return false;
428432
}
429433

430434
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin1.java

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.junit.Assert.assertEquals;
2121
import static org.junit.Assert.assertFalse;
22+
import static org.junit.Assert.assertNotNull;
2223
import static org.junit.Assert.assertTrue;
2324
import static org.junit.Assert.fail;
2425

@@ -36,6 +37,8 @@
3637
import org.apache.hadoop.hbase.TableName;
3738
import org.apache.hadoop.hbase.TableNotFoundException;
3839
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
40+
import org.apache.hadoop.hbase.master.CatalogJanitor;
41+
import org.apache.hadoop.hbase.master.HMaster;
3942
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
4043
import org.apache.hadoop.hbase.regionserver.HRegion;
4144
import org.apache.hadoop.hbase.regionserver.HStore;
@@ -523,25 +526,42 @@ public void testMergeRegions() throws Exception {
523526
List<RegionInfo> tableRegions;
524527
RegionInfo regionA;
525528
RegionInfo regionB;
529+
RegionInfo regionC;
530+
RegionInfo mergedChildRegion = null;
526531

527532
// merge with full name
528533
tableRegions = ADMIN.getRegions(tableName);
529534
assertEquals(3, tableRegions.size());
530535
regionA = tableRegions.get(0);
531536
regionB = tableRegions.get(1);
537+
regionC = tableRegions.get(2);
532538
// TODO convert this to version that is synchronous (See HBASE-16668)
533-
ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(), false).get(60,
534-
TimeUnit.SECONDS);
539+
ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(),
540+
false).get(60, TimeUnit.SECONDS);
535541

536-
assertEquals(2, ADMIN.getRegions(tableName).size());
537-
538-
// merge with encoded name
539542
tableRegions = ADMIN.getRegions(tableName);
540-
regionA = tableRegions.get(0);
541-
regionB = tableRegions.get(1);
543+
544+
assertEquals(2, tableRegions.size());
545+
for (RegionInfo ri : tableRegions) {
546+
if (regionC.compareTo(ri) != 0) {
547+
mergedChildRegion = ri;
548+
break;
549+
}
550+
}
551+
552+
assertNotNull(mergedChildRegion);
553+
// Need to wait GC for merged child region is done.
554+
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
555+
CatalogJanitor cj = services.getCatalogJanitor();
556+
cj.cleanMergeQualifier(mergedChildRegion);
557+
// Wait until all procedures settled down
558+
while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) {
559+
Thread.sleep(200);
560+
}
561+
542562
// TODO convert this to version that is synchronous (See HBASE-16668)
543-
ADMIN
544-
.mergeRegionsAsync(regionA.getEncodedNameAsBytes(), regionB.getEncodedNameAsBytes(), false)
563+
ADMIN.mergeRegionsAsync(regionC.getEncodedNameAsBytes(),
564+
mergedChildRegion.getEncodedNameAsBytes(), false)
545565
.get(60, TimeUnit.SECONDS);
546566

547567
assertEquals(1, ADMIN.getRegions(tableName).size());

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi2.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static org.hamcrest.CoreMatchers.instanceOf;
2222
import static org.junit.Assert.assertEquals;
2323
import static org.junit.Assert.assertFalse;
24+
import static org.junit.Assert.assertNotNull;
2425
import static org.junit.Assert.assertThat;
2526
import static org.junit.Assert.assertTrue;
2627
import static org.junit.Assert.fail;
@@ -35,6 +36,8 @@
3536
import org.apache.hadoop.hbase.HConstants;
3637
import org.apache.hadoop.hbase.HRegionLocation;
3738
import org.apache.hadoop.hbase.TableName;
39+
import org.apache.hadoop.hbase.master.CatalogJanitor;
40+
import org.apache.hadoop.hbase.master.HMaster;
3841
import org.apache.hadoop.hbase.testclassification.ClientTests;
3942
import org.apache.hadoop.hbase.testclassification.LargeTests;
4043
import org.apache.hadoop.hbase.util.Bytes;
@@ -161,20 +164,39 @@ public void testMergeRegions() throws Exception {
161164
.getTableHRegionLocations(metaTable, tableName).get();
162165
RegionInfo regionA;
163166
RegionInfo regionB;
167+
RegionInfo regionC;
168+
RegionInfo mergedChildRegion = null;
164169

165170
// merge with full name
166171
assertEquals(3, regionLocations.size());
167172
regionA = regionLocations.get(0).getRegion();
168173
regionB = regionLocations.get(1).getRegion();
174+
regionC = regionLocations.get(2).getRegion();
169175
admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get();
170176

171177
regionLocations = AsyncMetaTableAccessor
172178
.getTableHRegionLocations(metaTable, tableName).get();
179+
173180
assertEquals(2, regionLocations.size());
181+
for (HRegionLocation rl : regionLocations) {
182+
if (regionC.compareTo(rl.getRegion()) != 0) {
183+
mergedChildRegion = rl.getRegion();
184+
break;
185+
}
186+
}
187+
188+
assertNotNull(mergedChildRegion);
189+
// Need to wait GC for merged child region is done.
190+
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
191+
CatalogJanitor cj = services.getCatalogJanitor();
192+
cj.cleanMergeQualifier(mergedChildRegion);
193+
// Wait until all procedures settled down
194+
while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) {
195+
Thread.sleep(200);
196+
}
174197
// merge with encoded name
175-
regionA = regionLocations.get(0).getRegion();
176-
regionB = regionLocations.get(1).getRegion();
177-
admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get();
198+
admin.mergeRegions(regionC.getRegionName(), mergedChildRegion.getRegionName(),
199+
false).get();
178200

179201
regionLocations = AsyncMetaTableAccessor
180202
.getTableHRegionLocations(metaTable, tableName).get();

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@
2626
import java.util.List;
2727
import java.util.Map;
2828
import java.util.function.BooleanSupplier;
29+
import org.apache.hadoop.hbase.Cell;
30+
import org.apache.hadoop.hbase.CellBuilderFactory;
31+
import org.apache.hadoop.hbase.CellBuilderType;
2932
import org.apache.hadoop.hbase.HBaseClassTestRule;
3033
import org.apache.hadoop.hbase.HBaseTestingUtility;
3134
import org.apache.hadoop.hbase.HConstants;
3235
import org.apache.hadoop.hbase.MetaTableAccessor;
3336
import org.apache.hadoop.hbase.TableName;
37+
import org.apache.hadoop.hbase.client.Put;
3438
import org.apache.hadoop.hbase.client.RegionInfo;
3539
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
3640
import org.apache.hadoop.hbase.client.Result;
@@ -43,6 +47,7 @@
4347
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
4448
import org.apache.hadoop.hbase.testclassification.LargeTests;
4549
import org.apache.hadoop.hbase.testclassification.MasterTests;
50+
import org.apache.hadoop.hbase.util.Bytes;
4651
import org.apache.hadoop.hbase.util.Pair;
4752
import org.apache.hadoop.hbase.util.Threads;
4853
import org.junit.AfterClass;
@@ -141,7 +146,7 @@ public void testOneRegionTable() throws IOException {
141146
assertEquals(0, ris.size());
142147
}
143148

144-
private static void makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
149+
private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
145150
throws IOException {
146151
RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()).
147152
setStartKey(a.getStartKey()).
@@ -152,6 +157,7 @@ private static void makeOverlap(MasterServices services, RegionInfo a, RegionInf
152157
System.currentTimeMillis())));
153158
// TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
154159
services.getAssignmentManager().assign(overlapRegion);
160+
return overlapRegion;
155161
}
156162

157163
private void testOverlapCommon(final TableName tn) throws Exception {
@@ -167,7 +173,6 @@ private void testOverlapCommon(final TableName tn) throws Exception {
167173
makeOverlap(services, ris.get(1), ris.get(3));
168174
makeOverlap(services, ris.get(2), ris.get(3));
169175
makeOverlap(services, ris.get(2), ris.get(4));
170-
Threads.sleep(10000);
171176
}
172177

173178
@Test
@@ -308,6 +313,74 @@ public void testOverlapWithSmallMergeCount() throws Exception {
308313
}
309314
}
310315

316+
/**
317+
* This test covers the case that one of merged parent regions is a merged child region that
318+
* has not been GCed but there is no reference files anymore. In this case, it will kick off
319+
* a GC procedure, but no merge will happen.
320+
*/
321+
@Test
322+
public void testMergeWithMergedChildRegion() throws Exception {
323+
TableName tn = TableName.valueOf(this.name.getMethodName());
324+
Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
325+
List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
326+
assertTrue(ris.size() > 5);
327+
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
328+
CatalogJanitor cj = services.getCatalogJanitor();
329+
cj.scan();
330+
CatalogJanitor.Report report = cj.getLastReport();
331+
assertTrue(report.isEmpty());
332+
RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
333+
334+
cj.scan();
335+
report = cj.getLastReport();
336+
assertEquals(2, report.getOverlaps().size());
337+
338+
// Mark it as a merged child region.
339+
RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn).
340+
setStartKey(overlapRegion.getStartKey()).
341+
build();
342+
343+
Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
344+
Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
345+
HConstants.LATEST_TIMESTAMP);
346+
String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
347+
putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(
348+
putOfMerged.getRow()).
349+
setFamily(HConstants.CATALOG_FAMILY).
350+
setQualifier(Bytes.toBytes(qualifier)).
351+
setTimestamp(putOfMerged.getTimestamp()).
352+
setType(Cell.Type.Put).
353+
setValue(RegionInfo.toByteArray(fakedParentRegion)).
354+
build());
355+
356+
meta.put(putOfMerged);
357+
358+
MetaFixer fixer = new MetaFixer(services);
359+
fixer.fixOverlaps(report);
360+
361+
// Wait until all procedures settled down
362+
await(200, () -> {
363+
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
364+
});
365+
366+
// No merge is done, overlap is still there.
367+
cj.scan();
368+
report = cj.getLastReport();
369+
assertEquals(2, report.getOverlaps().size());
370+
371+
fixer.fixOverlaps(report);
372+
373+
// Wait until all procedures settled down
374+
await(200, () -> {
375+
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
376+
});
377+
378+
// Merge is done and no more overlaps
379+
cj.scan();
380+
report = cj.getLastReport();
381+
assertEquals(0, report.getOverlaps().size());
382+
}
383+
311384
/**
312385
* Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so
313386
* we can fix this condition. HBASE-24247
@@ -336,7 +409,6 @@ public void testOverlapWithMergeOfNonContiguous() throws Exception {
336409
while (!services.getMasterProcedureExecutor().isFinished(pid)) {
337410
Threads.sleep(100);
338411
}
339-
Threads.sleep(10000);
340412
services.getCatalogJanitor().scan();
341413
report = services.getCatalogJanitor().getLastReport();
342414
assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());

0 commit comments

Comments
 (0)