Skip to content

Commit 3c70d39

Browse files
committed
HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996)
Signed-off-by: Duo Zhang <zhangduo@apache.org> Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
1 parent 0cffa9d commit 3c70d39

File tree

4 files changed

+92
-16
lines changed

4 files changed

+92
-16
lines changed

hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,15 @@ public enum Counter {
168168
BYTES_COPIED
169169
}
170170

171+
/**
172+
* Indicates the checksum comparison result.
173+
*/
174+
public enum ChecksumComparison {
175+
TRUE, // checksum comparison is compatible and true.
176+
FALSE, // checksum comparison is compatible and false.
177+
INCOMPATIBLE, // checksum comparison is not compatible.
178+
}
179+
171180
private static class ExportMapper
172181
extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
173182
private static final Logger LOG = LoggerFactory.getLogger(ExportMapper.class);
@@ -539,6 +548,9 @@ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
539548
}
540549
}
541550

551+
/**
552+
* Utility to compare the file length and checksums for the paths specified.
553+
*/
542554
private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat)
543555
throws IOException {
544556
long inputLen = inputStat.getLen();
@@ -553,20 +565,64 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu
553565

554566
// If length==0, we will skip checksum
555567
if (inputLen != 0 && verifyChecksum) {
556-
FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
557-
if (inChecksum == null) {
558-
LOG.warn("Input file " + inputPath + " checksums are not available");
559-
}
560-
FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
561-
if (outChecksum == null) {
562-
LOG.warn("Output file " + outputPath + " checksums are not available");
563-
}
564-
if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) {
565-
throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath);
568+
FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
569+
FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
570+
571+
ChecksumComparison checksumComparison = verifyChecksum(inChecksum, outChecksum);
572+
if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
573+
StringBuilder errMessage = new StringBuilder("Checksum mismatch between ")
574+
.append(inputPath).append(" and ").append(outputPath).append(".");
575+
576+
boolean addSkipHint = false;
577+
String inputScheme = inputFs.getScheme();
578+
String outputScheme = outputFs.getScheme();
579+
if (!inputScheme.equals(outputScheme)) {
580+
errMessage.append(" Input and output filesystems are of different types.\n")
581+
.append("Their checksum algorithms may be incompatible.");
582+
addSkipHint = true;
583+
} else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
584+
errMessage.append(" Input and output differ in block-size.");
585+
addSkipHint = true;
586+
} else if (
587+
inChecksum != null && outChecksum != null
588+
&& !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
589+
) {
590+
errMessage.append(" Input and output checksum algorithms are of different types.");
591+
addSkipHint = true;
592+
}
593+
if (addSkipHint) {
594+
errMessage
595+
.append(" You can choose file-level checksum validation via "
596+
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
597+
+ " or filesystems are different.")
598+
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
599+
.append(" (NOTE: By skipping checksums, one runs the risk of "
600+
+ "masking data-corruption during file-transfer.)\n");
601+
}
602+
throw new IOException(errMessage.toString());
566603
}
567604
}
568605
}
569606

607+
/**
608+
* Utility to compare checksums
609+
*/
610+
private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
611+
final FileChecksum outChecksum) {
612+
// If the input or output checksum is null, or the algorithms of input and output are not
613+
// equal, that means there is no comparison
614+
// and return not compatible. else if matched, return compatible with the matched result.
615+
if (
616+
inChecksum == null || outChecksum == null
617+
|| !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
618+
) {
619+
return ChecksumComparison.INCOMPATIBLE;
620+
} else if (inChecksum.equals(outChecksum)) {
621+
return ChecksumComparison.TRUE;
622+
}
623+
return ChecksumComparison.FALSE;
624+
}
625+
570626
/**
571627
* Check if the two files are equal by looking at the file length, and at the checksum (if user
572628
* has specified the verifyChecksum flag).

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,23 @@ public void testConsecutiveExports() throws Exception {
227227
removeExportDir(copyDir);
228228
}
229229

230+
@Test
231+
public void testExportWithChecksum() throws Exception {
232+
// Test different schemes: input scheme is hdfs:// and output scheme is file://
233+
// The checksum verification will fail
234+
Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
235+
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
236+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false, false,
237+
getBypassRegionPredicate(), false, true);
238+
239+
// Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
240+
// The checksum verification will success
241+
Path copyHdfsDir = getHdfsDestinationDir();
242+
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
243+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false, false,
244+
getBypassRegionPredicate(), true, true);
245+
}
246+
230247
@Test
231248
public void testExportWithTargetName() throws Exception {
232249
final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
@@ -282,7 +299,7 @@ protected void testExportFileSystemState(final TableName tableName, final byte[]
282299
throws Exception {
283300
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
284301
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite, resetTtl,
285-
getBypassRegionPredicate(), true);
302+
getBypassRegionPredicate(), true, false);
286303
}
287304

288305
/**
@@ -291,8 +308,8 @@ protected void testExportFileSystemState(final TableName tableName, final byte[]
291308
protected static void testExportFileSystemState(final Configuration conf,
292309
final TableName tableName, final byte[] snapshotName, final byte[] targetName,
293310
final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean overwrite,
294-
final boolean resetTtl, final RegionPredicate bypassregionPredicate, boolean success)
295-
throws Exception {
311+
final boolean resetTtl, final RegionPredicate bypassregionPredicate, final boolean success,
312+
final boolean checksumVerify) throws Exception {
296313
FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
297314
FileSystem srcFs = srcDir.getFileSystem(conf);
298315
Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(), tgtFs.getWorkingDirectory());
@@ -313,6 +330,9 @@ protected static void testExportFileSystemState(final Configuration conf,
313330
if (resetTtl) {
314331
opts.add("--reset-ttl");
315332
}
333+
if (!checksumVerify) {
334+
opts.add("--no-checksum-verify");
335+
}
316336

317337
// Export Snapshot
318338
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ public void testExportRetry() throws Exception {
153153
conf.setInt("mapreduce.map.maxattempts", 3);
154154
TestExportSnapshot.testExportFileSystemState(conf, tableName, Bytes.toBytes(snapshotName),
155155
Bytes.toBytes(snapshotName), tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true,
156-
false, null, true);
156+
false, null, true, false);
157157
}
158158

159159
/**
@@ -170,6 +170,6 @@ public void testExportFailure() throws Exception {
170170
conf.setInt("mapreduce.map.maxattempts", 3);
171171
TestExportSnapshot.testExportFileSystemState(conf, tableName, Bytes.toBytes(snapshotName),
172172
Bytes.toBytes(snapshotName), tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true,
173-
false, null, false);
173+
false, null, false, false);
174174
}
175175
}

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs,
126126
TableName tableName = builder.getTableDescriptor().getTableName();
127127
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName,
128128
snapshotName, snapshotName, snapshotFilesCount, testDir,
129-
getDestinationDir(fs, testUtil, testDir), false, false, null, true);
129+
getDestinationDir(fs, testUtil, testDir), false, false, null, true, false);
130130
}
131131

132132
static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtility hctu, Path testDir)

0 commit comments

Comments
 (0)