Skip to content

Commit 71efe29

Browse files
ankitsolanmolnar
authored andcommitted
[HBASE-29520] Utilize Backed-up Bulkloaded Files in Incremental Backup (#7246)
Signed-off-by: Tak Lon (Stephen) Wu <taklwu@apache.org>
1 parent d9e5a44 commit 71efe29

File tree

11 files changed

+95
-64
lines changed

11 files changed

+95
-64
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/AbstractPitrRestoreHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
2222
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_CONTINUOUS_BACKUP_PITR_WINDOW_DAYS;
2323
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
24-
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
2524
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
25+
import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
2626
import static org.apache.hadoop.hbase.mapreduce.WALPlayer.IGNORE_EMPTY_FILES;
2727

2828
import java.io.IOException;

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_WORKERS_DESC;
5050
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME;
5151
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.OPTION_YARN_QUEUE_NAME_DESC;
52-
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
5352
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
53+
import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
5454

5555
import java.io.IOException;
5656
import java.net.URI;

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONF_CONTINUOUS_BACKUP_WAL_DIR;
2121
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CONF_KEY;
22+
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
2223
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
23-
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
2424
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
25+
import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
2526

2627
import java.io.IOException;
2728
import java.net.URI;
@@ -169,6 +170,26 @@ protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws I
169170
Path tblDir = CommonFSUtils.getTableDir(rootdir, srcTable);
170171
Path p = new Path(tblDir, regionName + Path.SEPARATOR + fam + Path.SEPARATOR + filename);
171172

173+
// For continuous backup: bulkload files are copied from backup directory defined by
174+
// CONF_CONTINUOUS_BACKUP_WAL_DIR instead of source cluster.
175+
String backupRootDir = conf.get(CONF_CONTINUOUS_BACKUP_WAL_DIR);
176+
if (backupInfo.isContinuousBackupEnabled() && !Strings.isNullOrEmpty(backupRootDir)) {
177+
String dayDirectoryName = BackupUtils.formatToDateString(bulkLoad.getTimestamp());
178+
Path bulkLoadBackupPath =
179+
new Path(backupRootDir, BULKLOAD_FILES_DIR + Path.SEPARATOR + dayDirectoryName);
180+
Path bulkLoadDir = new Path(bulkLoadBackupPath,
181+
srcTable.getNamespaceAsString() + Path.SEPARATOR + srcTable.getNameAsString());
182+
FileSystem backupFs = FileSystem.get(bulkLoadDir.toUri(), conf);
183+
Path fullBulkLoadBackupPath =
184+
new Path(bulkLoadDir, regionName + Path.SEPARATOR + fam + Path.SEPARATOR + filename);
185+
if (backupFs.exists(fullBulkLoadBackupPath)) {
186+
LOG.debug("Backup bulkload file found {}", fullBulkLoadBackupPath);
187+
p = fullBulkLoadBackupPath;
188+
} else {
189+
LOG.warn("Backup bulkload file not found {}", fullBulkLoadBackupPath);
190+
}
191+
}
192+
172193
String srcTableQualifier = srcTable.getQualifierAsString();
173194
String srcTableNs = srcTable.getNamespaceAsString();
174195
Path tgtFam = new Path(tgtRoot, srcTableNs + Path.SEPARATOR + srcTableQualifier

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,8 @@
2121
import java.io.FileNotFoundException;
2222
import java.io.IOException;
2323
import java.io.UncheckedIOException;
24-
import java.text.SimpleDateFormat;
25-
import java.util.Date;
2624
import java.util.List;
2725
import java.util.Map;
28-
import java.util.TimeZone;
2926
import java.util.UUID;
3027
import java.util.concurrent.ConcurrentHashMap;
3128
import java.util.concurrent.Executors;
@@ -41,6 +38,7 @@
4138
import org.apache.hadoop.hbase.HBaseConfiguration;
4239
import org.apache.hadoop.hbase.HConstants;
4340
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
41+
import org.apache.hadoop.hbase.backup.util.BackupUtils;
4442
import org.apache.hadoop.hbase.client.Connection;
4543
import org.apache.hadoop.hbase.client.ConnectionFactory;
4644
import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor;
@@ -94,7 +92,6 @@ public class ContinuousBackupReplicationEndpoint extends BaseReplicationEndpoint
9492

9593
public static final long ONE_DAY_IN_MILLISECONDS = TimeUnit.DAYS.toMillis(1);
9694
public static final String WAL_FILE_PREFIX = "wal_file.";
97-
public static final String DATE_FORMAT = "yyyy-MM-dd";
9895

9996
@Override
10097
public void init(Context context) throws IOException {
@@ -330,7 +327,7 @@ private void backupWalEntries(long day, List<WAL.Entry> walEntries) throws IOExc
330327
}
331328

332329
private FSHLogProvider.Writer createWalWriter(long dayInMillis) {
333-
String dayDirectoryName = formatToDateString(dayInMillis);
330+
String dayDirectoryName = BackupUtils.formatToDateString(dayInMillis);
334331

335332
FileSystem fs = backupFileSystemManager.getBackupFs();
336333
Path walsDir = backupFileSystemManager.getWalsDir();
@@ -408,7 +405,7 @@ void uploadBulkLoadFiles(long dayInMillis, List<Path> bulkLoadFiles)
408405
LOG.trace("{} Bulk load files to upload: {}", Utils.logPeerId(peerId),
409406
bulkLoadFiles.stream().map(Path::toString).collect(Collectors.joining(", ")));
410407
}
411-
String dayDirectoryName = formatToDateString(dayInMillis);
408+
String dayDirectoryName = BackupUtils.formatToDateString(dayInMillis);
412409
Path bulkloadDir = new Path(backupFileSystemManager.getBulkLoadFilesDir(), dayDirectoryName);
413410
try {
414411
backupFileSystemManager.getBackupFs().mkdirs(bulkloadDir);
@@ -446,7 +443,7 @@ void uploadBulkLoadFiles(long dayInMillis, List<Path> bulkLoadFiles)
446443
} catch (IOException e) {
447444
throw new BulkLoadUploadException(
448445
String.format("%s Failed to copy bulk load file %s to %s on day %s",
449-
Utils.logPeerId(peerId), file, destPath, formatToDateString(dayInMillis)),
446+
Utils.logPeerId(peerId), file, destPath, BackupUtils.formatToDateString(dayInMillis)),
450447
e);
451448
}
452449
}
@@ -495,19 +492,6 @@ static void copyWithCleanup(FileSystem srcFS, Path src, FileSystem dstFS, Path d
495492
}
496493
}
497494

498-
/**
499-
* Convert dayInMillis to "yyyy-MM-dd" format
500-
*/
501-
@RestrictedApi(
502-
explanation = "Package-private for test visibility only. Do not use outside tests.",
503-
link = "",
504-
allowedOnPath = "(.*/src/test/.*|.*/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java)")
505-
String formatToDateString(long dayInMillis) {
506-
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
507-
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
508-
return dateFormat.format(new Date(dayInMillis));
509-
}
510-
511495
private Path getBulkLoadFileStagingPath(Path relativePathFromNamespace) throws IOException {
512496
FileSystem rootFs = CommonFSUtils.getRootDirFileSystem(conf);
513497
Path rootDir = CommonFSUtils.getRootDir(conf);

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,17 @@
2424
import java.io.FileNotFoundException;
2525
import java.io.IOException;
2626
import java.net.URLDecoder;
27+
import java.text.SimpleDateFormat;
2728
import java.util.ArrayList;
2829
import java.util.Collections;
2930
import java.util.Comparator;
31+
import java.util.Date;
3032
import java.util.HashMap;
3133
import java.util.Iterator;
3234
import java.util.List;
3335
import java.util.Map;
3436
import java.util.Map.Entry;
37+
import java.util.TimeZone;
3538
import java.util.TreeMap;
3639
import java.util.TreeSet;
3740
import org.apache.hadoop.conf.Configuration;
@@ -86,6 +89,7 @@ public final class BackupUtils {
8689
private static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class);
8790
public static final String LOGNAME_SEPARATOR = ".";
8891
public static final int MILLISEC_IN_HOUR = 3600000;
92+
public static final String DATE_FORMAT = "yyyy-MM-dd";
8993

9094
private BackupUtils() {
9195
throw new AssertionError("Instantiating utility class...");
@@ -932,4 +936,13 @@ private static boolean continuousBackupReplicationPeerExists(Admin admin) throws
932936
return admin.listReplicationPeers().stream()
933937
.anyMatch(peer -> peer.getPeerId().equals(CONTINUOUS_BACKUP_REPLICATION_PEER));
934938
}
939+
940+
/**
941+
* Convert dayInMillis to "yyyy-MM-dd" format
942+
*/
943+
public static String formatToDateString(long dayInMillis) {
944+
SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);
945+
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
946+
return dateFormat.format(new Date(dayInMillis));
947+
}
935948
}

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
*/
1818
package org.apache.hadoop.hbase.backup;
1919

20+
import static org.apache.hadoop.hbase.HConstants.REPLICATION_BULKLOAD_ENABLE_KEY;
21+
import static org.apache.hadoop.hbase.HConstants.REPLICATION_CLUSTER_ID;
2022
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
2123
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_BACKUP_MAX_WAL_SIZE;
2224
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_STAGED_WAL_FLUSH_INITIAL_DELAY;
2325
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.CONF_STAGED_WAL_FLUSH_INTERVAL;
26+
import static org.apache.hadoop.hbase.mapreduce.WALPlayer.IGNORE_EMPTY_FILES;
2427

2528
import java.io.IOException;
2629
import java.util.ArrayList;
@@ -46,6 +49,7 @@
4649
import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
4750
import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
4851
import org.apache.hadoop.hbase.backup.impl.BackupManager;
52+
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
4953
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
5054
import org.apache.hadoop.hbase.backup.impl.FullTableBackupClient;
5155
import org.apache.hadoop.hbase.backup.impl.IncrementalBackupManager;
@@ -304,6 +308,9 @@ public static void setUpHelper() throws Exception {
304308
conf1.set(CONF_BACKUP_MAX_WAL_SIZE, "10240");
305309
conf1.set(CONF_STAGED_WAL_FLUSH_INITIAL_DELAY, "10");
306310
conf1.set(CONF_STAGED_WAL_FLUSH_INTERVAL, "10");
311+
conf1.setBoolean(REPLICATION_BULKLOAD_ENABLE_KEY, true);
312+
conf1.set(REPLICATION_CLUSTER_ID, "clusterId1");
313+
conf1.setBoolean(IGNORE_EMPTY_FILES, true);
307314

308315
if (secure) {
309316
// set the always on security provider
@@ -571,6 +578,12 @@ protected void dumpBackupDir() throws IOException {
571578
}
572579
}
573580

581+
BackupManifest getLatestBackupManifest(List<BackupInfo> backups) throws IOException {
582+
BackupInfo newestBackup = backups.get(0);
583+
return HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR),
584+
newestBackup.getBackupId());
585+
}
586+
574587
void deleteContinuousBackupReplicationPeerIfExists(Admin admin) throws IOException {
575588
if (
576589
admin.listReplicationPeers().stream()

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestBackupDeleteWithCleanup.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.CONTINUOUS_BACKUP_REPLICATION_PEER;
2222
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.BULKLOAD_FILES_DIR;
2323
import static org.apache.hadoop.hbase.backup.replication.BackupFileSystemManager.WALS_DIR;
24-
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.DATE_FORMAT;
2524
import static org.apache.hadoop.hbase.backup.replication.ContinuousBackupReplicationEndpoint.ONE_DAY_IN_MILLISECONDS;
25+
import static org.apache.hadoop.hbase.backup.util.BackupUtils.DATE_FORMAT;
2626
import static org.junit.Assert.assertEquals;
2727
import static org.junit.Assert.assertFalse;
2828
import static org.junit.Assert.assertTrue;

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestContinuousBackup.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,6 @@ String[] buildBackupArgs(String backupType, TableName[] tables, boolean continuo
271271
}
272272
}
273273

274-
BackupManifest getLatestBackupManifest(List<BackupInfo> backups) throws IOException {
275-
BackupInfo newestBackup = backups.get(0);
276-
return HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR),
277-
newestBackup.getBackupId());
278-
}
279-
280274
private void verifyTableInBackupSystemTable(TableName table) throws IOException {
281275
try (BackupSystemTable backupTable = new BackupSystemTable(TEST_UTIL.getConnection())) {
282276
Map<TableName, Long> tableBackupMap = backupTable.getContinuousBackupTableSet();

0 commit comments

Comments
 (0)