Skip to content

Commit fca0f3f

Browse files
authored
HBASE-24316 GCMulitpleMergedRegionsProcedure is not idempotent (#1660) (#1671)
It addresses couple issues: 1. Make sure deleteMergeQualifiers() does not delete the row if there is no columns with "merge" keyword. 2. GCMulitpleMergedRegionsProcedure now acquire an exclusive lock on the child region. Signed-off-by: stack <stack@apache.org>
1 parent 04ac08d commit fca0f3f

File tree

3 files changed

+64
-3
lines changed

3 files changed

+64
-3
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1904,6 +1904,16 @@ public static void deleteMergeQualifiers(Connection connection, final RegionInfo
19041904
qualifiers.add(qualifier);
19051905
delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP);
19061906
}
1907+
1908+
// There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while
1909+
// the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second
1910+
// GCMultipleMergedRegionsProcedure could delete the merged region by accident!
1911+
if (qualifiers.isEmpty()) {
1912+
LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() +
1913+
" in meta table, they are cleaned up already, Skip.");
1914+
return;
1915+
}
1916+
19071917
deleteFromMetaTable(connection, delete);
19081918
LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() +
19091919
", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary).

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMultipleMergedRegionsProcedure.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,26 @@ public GCMultipleMergedRegionsProcedure() {
6363
super();
6464
}
6565

66+
@Override protected boolean holdLock(MasterProcedureEnv env) {
67+
return true;
68+
}
69+
70+
@Override
71+
protected LockState acquireLock(final MasterProcedureEnv env) {
72+
// It now takes an exclusive lock on the merged child region to make sure
73+
// that no two parallel running of two GCMultipleMergedRegionsProcedures on the
74+
// region.
75+
if (env.getProcedureScheduler().waitRegion(this, mergedChild)) {
76+
return LockState.LOCK_EVENT_WAIT;
77+
}
78+
return LockState.LOCK_ACQUIRED;
79+
}
80+
81+
@Override
82+
protected void releaseLock(final MasterProcedureEnv env) {
83+
env.getProcedureScheduler().wakeRegion(this, mergedChild);
84+
}
85+
6686
@Override
6787
public TableOperationType getTableOperationType() {
6888
return TableOperationType.MERGED_REGIONS_GC;

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.io.IOException;
2424
import java.util.Collections;
2525
import java.util.List;
26+
import java.util.Map;
2627
import java.util.function.BooleanSupplier;
2728
import org.apache.hadoop.hbase.HBaseClassTestRule;
2829
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -31,8 +32,12 @@
3132
import org.apache.hadoop.hbase.TableName;
3233
import org.apache.hadoop.hbase.client.RegionInfo;
3334
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
35+
import org.apache.hadoop.hbase.client.Result;
3436
import org.apache.hadoop.hbase.client.Table;
3537
import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
38+
import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
39+
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
40+
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
3641
import org.apache.hadoop.hbase.testclassification.LargeTests;
3742
import org.apache.hadoop.hbase.testclassification.MasterTests;
3843
import org.apache.hadoop.hbase.util.Threads;
@@ -168,18 +173,44 @@ public void testOverlap() throws Exception {
168173
assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
169174
MetaFixer fixer = new MetaFixer(services);
170175
fixer.fixOverlaps(report);
176+
177+
CatalogJanitor cj = services.getCatalogJanitor();
171178
await(10, () -> {
172179
try {
173-
services.getCatalogJanitor().scan();
174-
final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
175-
return postReport.isEmpty();
180+
if (cj.scan() > 0) {
181+
// It submits GC once, then it will immediately kick off another GC to test if
182+
// GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
183+
// a hole.
184+
Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
185+
for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
186+
List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
187+
if (parents != null) {
188+
ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
189+
pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(),
190+
e.getKey(), parents));
191+
}
192+
}
193+
return true;
194+
}
195+
return false;
176196
} catch (Exception e) {
177197
throw new RuntimeException(e);
178198
}
179199
});
180200

201+
// Wait until all GCs settled down
202+
await(10, () -> {
203+
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
204+
});
205+
206+
// No orphan regions on FS
181207
hbckChore.chore();
182208
assertEquals(0, hbckChore.getOrphanRegionsOnFS().size());
209+
210+
// No holes reported.
211+
cj.scan();
212+
final CatalogJanitor.Report postReport = cj.getLastReport();
213+
assertTrue(postReport.isEmpty());
183214
}
184215

185216
/**

0 commit comments

Comments
 (0)