Skip to content

Commit dd694e4

Browse files
authored
HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996)
Signed-off-by: Duo Zhang <zhangduo@apache.org> Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
1 parent f8aa3e2 commit dd694e4

File tree

4 files changed

+92
-16
lines changed

4 files changed

+92
-16
lines changed

hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,15 @@ public enum Counter {
168168
BYTES_COPIED
169169
}
170170

171+
/**
172+
* Indicates the checksum comparison result.
173+
*/
174+
public enum ChecksumComparison {
175+
TRUE, // checksum comparison is compatible and true.
176+
FALSE, // checksum comparison is compatible and false.
177+
INCOMPATIBLE, // checksum comparison is not compatible.
178+
}
179+
171180
private static class ExportMapper
172181
extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
173182
private static final Logger LOG = LoggerFactory.getLogger(ExportMapper.class);
@@ -533,6 +542,9 @@ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
533542
}
534543
}
535544

545+
/**
546+
* Utility to compare the file length and checksums for the paths specified.
547+
*/
536548
private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat)
537549
throws IOException {
538550
long inputLen = inputStat.getLen();
@@ -547,20 +559,64 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu
547559

548560
// If length==0, we will skip checksum
549561
if (inputLen != 0 && verifyChecksum) {
550-
FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
551-
if (inChecksum == null) {
552-
LOG.warn("Input file " + inputPath + " checksums are not available");
553-
}
554-
FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
555-
if (outChecksum == null) {
556-
LOG.warn("Output file " + outputPath + " checksums are not available");
557-
}
558-
if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) {
559-
throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath);
562+
FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
563+
FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
564+
565+
ChecksumComparison checksumComparison = verifyChecksum(inChecksum, outChecksum);
566+
if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
567+
StringBuilder errMessage = new StringBuilder("Checksum mismatch between ")
568+
.append(inputPath).append(" and ").append(outputPath).append(".");
569+
570+
boolean addSkipHint = false;
571+
String inputScheme = inputFs.getScheme();
572+
String outputScheme = outputFs.getScheme();
573+
if (!inputScheme.equals(outputScheme)) {
574+
errMessage.append(" Input and output filesystems are of different types.\n")
575+
.append("Their checksum algorithms may be incompatible.");
576+
addSkipHint = true;
577+
} else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
578+
errMessage.append(" Input and output differ in block-size.");
579+
addSkipHint = true;
580+
} else if (
581+
inChecksum != null && outChecksum != null
582+
&& !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
583+
) {
584+
errMessage.append(" Input and output checksum algorithms are of different types.");
585+
addSkipHint = true;
586+
}
587+
if (addSkipHint) {
588+
errMessage
589+
.append(" You can choose file-level checksum validation via "
590+
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
591+
+ " or filesystems are different.")
592+
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
593+
.append(" (NOTE: By skipping checksums, one runs the risk of "
594+
+ "masking data-corruption during file-transfer.)\n");
595+
}
596+
throw new IOException(errMessage.toString());
560597
}
561598
}
562599
}
563600

601+
/**
602+
* Utility to compare checksums
603+
*/
604+
private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
605+
final FileChecksum outChecksum) {
606+
// If the input or output checksum is null, or the algorithms of input and output are not
607+
// equal, that means there is no comparison
608+
// and return not compatible. else if matched, return compatible with the matched result.
609+
if (
610+
inChecksum == null || outChecksum == null
611+
|| !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
612+
) {
613+
return ChecksumComparison.INCOMPATIBLE;
614+
} else if (inChecksum.equals(outChecksum)) {
615+
return ChecksumComparison.TRUE;
616+
}
617+
return ChecksumComparison.FALSE;
618+
}
619+
564620
/**
565621
* Check if the two files are equal by looking at the file length, and at the checksum (if user
566622
* has specified the verifyChecksum flag).

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,23 @@ public void testConsecutiveExports() throws Exception {
227227
removeExportDir(copyDir);
228228
}
229229

230+
@Test
231+
public void testExportWithChecksum() throws Exception {
232+
// Test different schemes: input scheme is hdfs:// and output scheme is file://
233+
// The checksum verification will fail
234+
Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
235+
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
236+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false, false,
237+
getBypassRegionPredicate(), false, true);
238+
239+
// Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
240+
// The checksum verification will success
241+
Path copyHdfsDir = getHdfsDestinationDir();
242+
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
243+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false, false,
244+
getBypassRegionPredicate(), true, true);
245+
}
246+
230247
@Test
231248
public void testExportWithTargetName() throws Exception {
232249
final String targetName = "testExportWithTargetName";
@@ -281,7 +298,7 @@ protected void testExportFileSystemState(final TableName tableName, final String
281298
throws Exception {
282299
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
283300
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite, resetTtl,
284-
getBypassRegionPredicate(), true);
301+
getBypassRegionPredicate(), true, false);
285302
}
286303

287304
/**
@@ -290,8 +307,8 @@ protected void testExportFileSystemState(final TableName tableName, final String
290307
protected static void testExportFileSystemState(final Configuration conf,
291308
final TableName tableName, final String snapshotName, final String targetName,
292309
final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean overwrite,
293-
final boolean resetTtl, final RegionPredicate bypassregionPredicate, boolean success)
294-
throws Exception {
310+
final boolean resetTtl, final RegionPredicate bypassregionPredicate, final boolean success,
311+
final boolean checksumVerify) throws Exception {
295312
FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
296313
FileSystem srcFs = srcDir.getFileSystem(conf);
297314
Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(), tgtFs.getWorkingDirectory());
@@ -312,6 +329,9 @@ protected static void testExportFileSystemState(final Configuration conf,
312329
if (resetTtl) {
313330
opts.add("--reset-ttl");
314331
}
332+
if (!checksumVerify) {
333+
opts.add("--no-checksum-verify");
334+
}
315335

316336
// Export Snapshot
317337
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotAdjunct.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ public void testExportRetry() throws Exception {
151151
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 2);
152152
conf.setInt("mapreduce.map.maxattempts", 3);
153153
TestExportSnapshot.testExportFileSystemState(conf, tableName, snapshotName, snapshotName,
154-
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, true);
154+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, true, false);
155155
}
156156

157157
/**
@@ -167,6 +167,6 @@ public void testExportFailure() throws Exception {
167167
conf.setInt(ExportSnapshot.Testing.CONF_TEST_FAILURE_COUNT, 4);
168168
conf.setInt("mapreduce.map.maxattempts", 3);
169169
TestExportSnapshot.testExportFileSystemState(conf, tableName, snapshotName, snapshotName,
170-
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, false);
170+
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true, false, null, false, false);
171171
}
172172
}

hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshotV1NoCluster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs,
125125
TableName tableName = builder.getTableDescriptor().getTableName();
126126
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName,
127127
snapshotName, snapshotName, snapshotFilesCount, testDir,
128-
getDestinationDir(fs, testUtil, testDir), false, false, null, true);
128+
getDestinationDir(fs, testUtil, testDir), false, false, null, true, false);
129129
}
130130

131131
static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtil hctu, Path testDir)

0 commit comments

Comments
 (0)