Skip to content

Commit 00d1aed

Browse files
committed
HBASE-23553 Snapshot referenced data files are deleted in some case
1 parent 82e155e commit 00d1aed

File tree

3 files changed

+153
-0
lines changed

3 files changed

+153
-0
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3857,4 +3857,9 @@ public void run() {
38573857
public AsyncClusterConnection getAsyncClusterConnection() {
38583858
return asyncClusterConnection;
38593859
}
3860+
3861+
@VisibleForTesting
3862+
public CompactedHFilesDischarger getCompactedHFilesDischarger() {
3863+
return compactedFileDischarger;
3864+
}
38603865
}

hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,16 @@ public void storeFile(final RegionInfo regionInfo, final String family,
350350
String hfile = storeFile.getName();
351351
if (HFileLink.isHFileLink(hfile)) {
352352
names.add(HFileLink.getReferencedHFileName(hfile));
353+
} else if (StoreFileInfo.isReference(hfile)) {
354+
Path refPath = StoreFileInfo.getReferredToFile(new Path(new Path(
355+
new Path(new Path(regionInfo.getTable().getNamespaceAsString(),
356+
regionInfo.getTable().getQualifierAsString()), regionInfo.getEncodedName()),
357+
family), hfile));
358+
names.add(hfile);
359+
names.add(refPath.getName());
360+
if (HFileLink.isHFileLink(refPath.getName())) {
361+
names.add(HFileLink.getReferencedHFileName(refPath.getName()));
362+
}
353363
} else {
354364
names.add(hfile);
355365
}

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
*/
1818
package org.apache.hadoop.hbase.client;
1919

20+
import java.io.FileNotFoundException;
2021
import java.io.IOException;
2122
import java.util.Arrays;
2223
import java.util.List;
24+
import java.util.stream.Collectors;
2325
import org.apache.hadoop.conf.Configuration;
26+
import org.apache.hadoop.fs.FileStatus;
2427
import org.apache.hadoop.fs.FileSystem;
2528
import org.apache.hadoop.fs.Path;
2629
import org.apache.hadoop.hbase.Cell;
@@ -29,13 +32,19 @@
2932
import org.apache.hadoop.hbase.HBaseTestingUtility;
3033
import org.apache.hadoop.hbase.StartMiniClusterOption;
3134
import org.apache.hadoop.hbase.TableName;
35+
import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner;
3236
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
37+
import org.apache.hadoop.hbase.regionserver.HRegion;
38+
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
39+
import org.apache.hadoop.hbase.regionserver.HRegionServer;
3340
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
3441
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
3542
import org.apache.hadoop.hbase.testclassification.ClientTests;
3643
import org.apache.hadoop.hbase.testclassification.LargeTests;
3744
import org.apache.hadoop.hbase.util.Bytes;
3845
import org.apache.hadoop.hbase.util.FSUtils;
46+
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
47+
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
3948
import org.junit.After;
4049
import org.junit.Assert;
4150
import org.junit.ClassRule;
@@ -306,4 +315,133 @@ private static void verifyRow(Result result) throws IOException {
306315
}
307316
}
308317

318+
@Test
319+
public void testMergeRegion() throws Exception {
320+
setupCluster();
321+
TableName tableName = TableName.valueOf("testMergeRegion");
322+
String snapshotName = tableName.getNameAsString() + "_snapshot";
323+
Configuration conf = UTIL.getConfiguration();
324+
Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
325+
long timeout = 20000; // 20s
326+
try (Admin admin = UTIL.getAdmin()) {
327+
List<String> serverList = admin.getRegionServers().stream().map(sn -> sn.getServerName())
328+
.collect(Collectors.toList());
329+
// create table with 3 regions
330+
Table table = UTIL.createTable(tableName, FAMILIES, 1, bbb, yyy, 3);
331+
List<RegionInfo> regions = admin.getRegions(tableName);
332+
Assert.assertEquals(3, regions.size());
333+
RegionInfo region0 = regions.get(0);
334+
RegionInfo region1 = regions.get(1);
335+
RegionInfo region2 = regions.get(2);
336+
// put some data in the table
337+
UTIL.loadTable(table, FAMILIES);
338+
admin.flush(tableName);
339+
// wait flush is finished
340+
UTIL.waitFor(timeout, () -> {
341+
try {
342+
Path tableDir = FSUtils.getTableDir(rootDir, tableName);
343+
for (RegionInfo region : regions) {
344+
Path regionDir = new Path(tableDir, region.getEncodedName());
345+
for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
346+
if (fs.listStatus(familyDir).length != 1) {
347+
return false;
348+
}
349+
}
350+
}
351+
return true;
352+
} catch (IOException e) {
353+
LOG.warn("Failed check if flush is finished", e);
354+
return false;
355+
}
356+
});
357+
// merge 2 regions
358+
admin.compactionSwitch(false, serverList);
359+
admin.mergeRegionsAsync(region0.getEncodedNameAsBytes(), region1.getEncodedNameAsBytes(),
360+
true);
361+
UTIL.waitFor(timeout, () -> admin.getRegions(tableName).size() == 2);
362+
List<RegionInfo> mergedRegions = admin.getRegions(tableName);
363+
RegionInfo mergedRegion =
364+
mergedRegions.get(0).getEncodedName().equals(region2.getEncodedName())
365+
? mergedRegions.get(1)
366+
: mergedRegions.get(0);
367+
// snapshot
368+
admin.snapshot(snapshotName, tableName);
369+
Assert.assertEquals(1, admin.listSnapshots().size());
370+
// major compact
371+
admin.compactionSwitch(true, serverList);
372+
admin.majorCompactRegion(mergedRegion.getRegionName());
373+
// wait until merged region has no reference
374+
UTIL.waitFor(timeout, () -> {
375+
try {
376+
for (RegionServerThread regionServerThread : UTIL.getMiniHBaseCluster()
377+
.getRegionServerThreads()) {
378+
HRegionServer regionServer = regionServerThread.getRegionServer();
379+
for (HRegion subRegion : regionServer.getRegions(tableName)) {
380+
if (subRegion.getRegionInfo().getEncodedName()
381+
.equals(mergedRegion.getEncodedName())) {
382+
regionServer.getCompactedHFilesDischarger().chore();
383+
}
384+
}
385+
}
386+
Path tableDir = FSUtils.getTableDir(rootDir, tableName);
387+
HRegionFileSystem regionFs = HRegionFileSystem
388+
.openRegionFromFileSystem(UTIL.getConfiguration(), fs, tableDir, mergedRegion, true);
389+
return !regionFs.hasReferences(admin.getDescriptor(tableName));
390+
} catch (IOException e) {
391+
LOG.warn("Failed check merged region has no reference", e);
392+
return false;
393+
}
394+
});
395+
// run catalog janitor to clean and wait for parent regions are archived
396+
UTIL.getMiniHBaseCluster().getMaster().getCatalogJanitor().choreForTesting();
397+
UTIL.waitFor(timeout, () -> {
398+
try {
399+
Path tableDir = FSUtils.getTableDir(rootDir, tableName);
400+
for (FileStatus fileStatus : fs.listStatus(tableDir)) {
401+
String name = fileStatus.getPath().getName();
402+
if (name.equals(region0.getEncodedName()) || name.equals(region1.getEncodedName())) {
403+
return false;
404+
}
405+
}
406+
return true;
407+
} catch (IOException e) {
408+
LOG.warn("Check if parent regions are archived error", e);
409+
return false;
410+
}
411+
});
412+
// set file modify time and then run cleaner
413+
long time = System.currentTimeMillis() - TimeToLiveHFileCleaner.DEFAULT_TTL * 1000;
414+
traverseAndSetFileTime(HFileArchiveUtil.getArchivePath(conf), time);
415+
UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().runCleaner();
416+
// scan snapshot
417+
try (TableSnapshotScanner scanner = new TableSnapshotScanner(conf,
418+
UTIL.getDataTestDirOnTestFS(snapshotName), snapshotName, new Scan(bbb, yyy))) {
419+
verifyScanner(scanner, bbb, yyy);
420+
}
421+
} catch (Exception e) {
422+
LOG.error("scan snapshot error", e);
423+
Assert.fail("Should not throw FileNotFoundException");
424+
Assert.assertTrue(e.getCause() != null);
425+
Assert.assertTrue(e.getCause().getCause() instanceof FileNotFoundException);
426+
} finally {
427+
tearDownCluster();
428+
}
429+
}
430+
431+
private void traverseAndSetFileTime(Path path, long time) throws IOException {
432+
fs.setTimes(path, time, -1);
433+
if (fs.isDirectory(path)) {
434+
List<FileStatus> allPaths = Arrays.asList(fs.listStatus(path));
435+
List<FileStatus> subDirs =
436+
allPaths.stream().filter(FileStatus::isDirectory).collect(Collectors.toList());
437+
List<FileStatus> files =
438+
allPaths.stream().filter(FileStatus::isFile).collect(Collectors.toList());
439+
for (FileStatus subDir : subDirs) {
440+
traverseAndSetFileTime(subDir.getPath(), time);
441+
}
442+
for (FileStatus file : files) {
443+
fs.setTimes(file.getPath(), time, -1);
444+
}
445+
}
446+
}
309447
}

0 commit comments

Comments
 (0)