Skip to content

Commit 5cf0d44

Browse files
author
Hernan Gelaf-Romer
committed
Re-use splits from full backup
1 parent 6f8db78 commit 5cf0d44

File tree

12 files changed

+673
-64
lines changed

12 files changed

+673
-64
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreFactory.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupCopyJob;
2222
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupMergeJob;
2323
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceRestoreJob;
24+
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceRestoreToOriginalSplitsJob;
2425
import org.apache.hadoop.util.ReflectionUtils;
2526
import org.apache.yetus.audience.InterfaceAudience;
2627

@@ -43,8 +44,13 @@ private BackupRestoreFactory() {
4344
* @return backup restore job instance
4445
*/
4546
public static RestoreJob getRestoreJob(Configuration conf) {
47+
Class<? extends RestoreJob> defaultCls =
48+
conf.getBoolean(RestoreJob.KEEP_ORIGINAL_SPLITS_KEY, RestoreJob.KEEP_ORIGINAL_SPLITS_DEFAULT)
49+
? MapReduceRestoreToOriginalSplitsJob.class
50+
: MapReduceRestoreJob.class;
51+
4652
Class<? extends RestoreJob> cls =
47-
conf.getClass(HBASE_INCR_RESTORE_IMPL_CLASS, MapReduceRestoreJob.class, RestoreJob.class);
53+
conf.getClass(HBASE_INCR_RESTORE_IMPL_CLASS, defaultCls, RestoreJob.class);
4854
RestoreJob service = ReflectionUtils.newInstance(cls, conf);
4955
service.setConf(conf);
5056
return service;

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreJob.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030

3131
@InterfaceAudience.Private
3232
public interface RestoreJob extends Configurable {
33+
34+
String KEEP_ORIGINAL_SPLITS_KEY = "hbase.backup.restorejob.keep.original.splits";
35+
boolean KEEP_ORIGINAL_SPLITS_DEFAULT = false;
36+
3337
/**
3438
* Run restore operation
3539
* @param dirPaths path array of WAL log directories

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/RestoreRequest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ public Builder withOvewrite(boolean overwrite) {
6767
return this;
6868
}
6969

70+
public Builder withKeepOriginalSplits(boolean keepOriginalSplits) {
71+
request.setKeepOriginalSplits(keepOriginalSplits);
72+
return this;
73+
}
74+
7075
public RestoreRequest build() {
7176
return request;
7277
}
@@ -80,6 +85,8 @@ public RestoreRequest build() {
8085
private TableName[] toTables;
8186
private boolean overwrite = false;
8287

88+
private boolean keepOriginalSplits = false;
89+
8390
private RestoreRequest() {
8491
}
8592

@@ -145,4 +152,13 @@ private RestoreRequest setOverwrite(boolean overwrite) {
145152
this.overwrite = overwrite;
146153
return this;
147154
}
155+
156+
public boolean isKeepOriginalSplits() {
157+
return keepOriginalSplits;
158+
}
159+
160+
private RestoreRequest setKeepOriginalSplits(boolean keepOriginalSplits) {
161+
this.keepOriginalSplits = keepOriginalSplits;
162+
return this;
163+
}
148164
}

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java

Lines changed: 101 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
import org.apache.commons.io.FilenameUtils;
3131
import org.apache.commons.lang3.StringUtils;
3232
import org.apache.hadoop.fs.FileSystem;
33+
import org.apache.hadoop.fs.LocatedFileStatus;
3334
import org.apache.hadoop.fs.Path;
35+
import org.apache.hadoop.fs.RemoteIterator;
3436
import org.apache.hadoop.hbase.TableName;
3537
import org.apache.hadoop.hbase.backup.BackupCopyJob;
3638
import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -40,13 +42,16 @@
4042
import org.apache.hadoop.hbase.backup.BackupType;
4143
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
4244
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupCopyJob;
45+
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceHFileSplitterJob;
4346
import org.apache.hadoop.hbase.backup.util.BackupUtils;
4447
import org.apache.hadoop.hbase.client.Admin;
4548
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
4649
import org.apache.hadoop.hbase.client.Connection;
50+
import org.apache.hadoop.hbase.io.hfile.HFile;
4751
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
4852
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
4953
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
54+
import org.apache.hadoop.hbase.snapshot.SnapshotRegionLocator;
5055
import org.apache.hadoop.hbase.util.CommonFSUtils;
5156
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
5257
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
@@ -165,54 +170,60 @@ protected List<BulkLoad> handleBulkLoad(List<TableName> tablesToBackup) throws I
165170
LOG.debug("copying archive {} to {}", archive, tgt);
166171
archiveFiles.add(archive.toString());
167172
}
173+
mergeSplitBulkloads(activeFiles, archiveFiles, srcTable);
174+
incrementalCopyBulkloadHFiles(tgtFs, srcTable);
168175
}
169-
170-
copyBulkLoadedFiles(activeFiles, archiveFiles);
171176
return bulkLoads;
172177
}
173178

174-
private void copyBulkLoadedFiles(List<String> activeFiles, List<String> archiveFiles)
175-
throws IOException {
176-
try {
177-
// Enable special mode of BackupDistCp
178-
conf.setInt(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 5);
179-
// Copy active files
180-
String tgtDest = backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId();
181-
int attempt = 1;
182-
while (activeFiles.size() > 0) {
183-
LOG.info("Copy " + activeFiles.size() + " active bulk loaded files. Attempt =" + attempt++);
184-
String[] toCopy = new String[activeFiles.size()];
185-
activeFiles.toArray(toCopy);
186-
// Active file can be archived during copy operation,
187-
// we need to handle this properly
188-
try {
189-
incrementalCopyHFiles(toCopy, tgtDest);
190-
break;
191-
} catch (IOException e) {
192-
// Check if some files got archived
193-
// Update active and archived lists
194-
// When file is being moved from active to archive
195-
// directory, the number of active files decreases
196-
int numOfActive = activeFiles.size();
197-
updateFileLists(activeFiles, archiveFiles);
198-
if (activeFiles.size() < numOfActive) {
199-
continue;
200-
}
201-
// if not - throw exception
202-
throw e;
179+
private void mergeSplitBulkloads(List<String> activeFiles, List<String> archiveFiles,
180+
TableName tn) throws IOException {
181+
int attempt = 1;
182+
183+
while (!activeFiles.isEmpty()) {
184+
LOG.info("MergeSplit {} active bulk loaded files. Attempt={}", activeFiles.size(), attempt++);
185+
// Active file can be archived during copy operation,
186+
// we need to handle this properly
187+
try {
188+
mergeSplitBulkloads(activeFiles, tn);
189+
break;
190+
} catch (IOException e) {
191+
int numActiveFiles = activeFiles.size();
192+
updateFileLists(activeFiles, archiveFiles);
193+
if (activeFiles.size() < numActiveFiles) {
194+
continue;
203195
}
196+
197+
throw e;
204198
}
205-
// If incremental copy will fail for archived files
206-
// we will have partially loaded files in backup destination (only files from active data
207-
// directory). It is OK, because the backup will marked as FAILED and data will be cleaned up
208-
if (archiveFiles.size() > 0) {
209-
String[] toCopy = new String[archiveFiles.size()];
210-
archiveFiles.toArray(toCopy);
211-
incrementalCopyHFiles(toCopy, tgtDest);
212-
}
213-
} finally {
214-
// Disable special mode of BackupDistCp
215-
conf.unset(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY);
199+
}
200+
201+
if (!archiveFiles.isEmpty()) {
202+
mergeSplitBulkloads(archiveFiles, tn);
203+
}
204+
}
205+
206+
private void mergeSplitBulkloads(List<String> files, TableName tn) throws IOException {
207+
MapReduceHFileSplitterJob player = new MapReduceHFileSplitterJob();
208+
conf.set(MapReduceHFileSplitterJob.BULK_OUTPUT_CONF_KEY,
209+
getBulkOutputDirForTable(tn).toString());
210+
player.setConf(conf);
211+
212+
String inputDirs = StringUtils.join(files, ",");
213+
String[] args = { inputDirs, tn.getNameWithNamespaceInclAsString() };
214+
215+
int result;
216+
217+
try {
218+
result = player.run(args);
219+
} catch (Exception e) {
220+
LOG.error("Failed to run MapReduceHFileSplitterJob", e);
221+
throw new IOException(e);
222+
}
223+
224+
if (result != 0) {
225+
throw new IOException(
226+
"Failed to run MapReduceHFileSplitterJob with invalid result: " + result);
216227
}
217228
}
218229

@@ -263,6 +274,7 @@ public void execute() throws IOException, ColumnFamilyMismatchException {
263274
try {
264275
// copy out the table and region info files for each table
265276
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
277+
setupRegionLocator();
266278
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
267279
convertWALsToHFiles();
268280
incrementalCopyHFiles(new String[] { getBulkOutputDir().toString() },
@@ -405,6 +417,29 @@ protected void walToHFiles(List<String> dirPaths, List<String> tableList) throws
405417
}
406418
}
407419

420+
private void incrementalCopyBulkloadHFiles(FileSystem tgtFs, TableName tn) throws IOException {
421+
Path bulkOutDir = getBulkOutputDirForTable(tn);
422+
FileSystem fs = FileSystem.get(conf);
423+
424+
if (fs.exists(bulkOutDir)) {
425+
conf.setInt(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 2);
426+
Path tgtPath = getTargetDirForTable(tn);
427+
try {
428+
RemoteIterator<LocatedFileStatus> locatedFiles = tgtFs.listFiles(bulkOutDir, true);
429+
List<String> files = new ArrayList<>();
430+
while (locatedFiles.hasNext()) {
431+
LocatedFileStatus file = locatedFiles.next();
432+
if (file.isFile() && HFile.isHFileFormat(tgtFs, file.getPath())) {
433+
files.add(file.getPath().toString());
434+
}
435+
}
436+
incrementalCopyHFiles(files.toArray(files.toArray(new String[0])), tgtPath.toString());
437+
} finally {
438+
conf.unset(MapReduceBackupCopyJob.NUMBER_OF_LEVELS_TO_PRESERVE_KEY);
439+
}
440+
}
441+
}
442+
408443
protected Path getBulkOutputDirForTable(TableName table) {
409444
Path tablePath = getBulkOutputDir();
410445
tablePath = new Path(tablePath, table.getNamespaceAsString());
@@ -420,6 +455,30 @@ protected Path getBulkOutputDir() {
420455
return path;
421456
}
422457

458+
private Path getTargetDirForTable(TableName table) {
459+
Path path = new Path(backupInfo.getBackupRootDir() + Path.SEPARATOR + backupInfo.getBackupId());
460+
path = new Path(path, table.getNamespaceAsString());
461+
path = new Path(path, table.getNameAsString());
462+
return path;
463+
}
464+
465+
private void setupRegionLocator() throws IOException {
466+
Map<TableName, String> fullBackupIds = getFullBackupIds();
467+
try (BackupAdminImpl backupAdmin = new BackupAdminImpl(conn)) {
468+
469+
for (TableName tableName : backupInfo.getTables()) {
470+
String fullBackupId = fullBackupIds.get(tableName);
471+
BackupInfo fullBackupInfo = backupAdmin.getBackupInfo(fullBackupId);
472+
String snapshotName = fullBackupInfo.getSnapshotName(tableName);
473+
Path root = HBackupFileSystem.getTableBackupPath(tableName,
474+
new Path(fullBackupInfo.getBackupRootDir()), fullBackupId);
475+
String manifestDir =
476+
SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, root).toString();
477+
SnapshotRegionLocator.setSnapshotManifestDir(conf, manifestDir, tableName);
478+
}
479+
}
480+
}
481+
423482
private Map<TableName, String> getFullBackupIds() throws IOException {
424483
// Ancestors are stored from newest to oldest, so we can iterate backwards
425484
// in order to populate our backupId map with the most recent full backup

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ public class RestoreTablesClient {
5959
private Path restoreRootDir;
6060
private boolean isOverwrite;
6161

62+
private boolean isKeepOriginalSplits;
63+
6264
public RestoreTablesClient(Connection conn, RestoreRequest request) throws IOException {
6365
this.backupRootDir = request.getBackupRootDir();
6466
this.backupId = request.getBackupId();
@@ -68,6 +70,7 @@ public RestoreTablesClient(Connection conn, RestoreRequest request) throws IOExc
6870
this.tTableArray = sTableArray;
6971
}
7072
this.isOverwrite = request.isOverwrite();
73+
this.isKeepOriginalSplits = request.isKeepOriginalSplits();
7174
this.conn = conn;
7275
this.conf = conn.getConfiguration();
7376
if (request.getRestoreRootDir() != null) {
@@ -132,7 +135,7 @@ private void checkTargetTables(TableName[] tTableArray, boolean isOverwrite) thr
132135
*/
133136

134137
private void restoreImages(BackupImage[] images, TableName sTable, TableName tTable,
135-
boolean truncateIfExists) throws IOException {
138+
boolean truncateIfExists, boolean isKeepOriginalSplits) throws IOException {
136139
// First image MUST be image of a FULL backup
137140
BackupImage image = images[0];
138141
String rootDir = image.getRootDir();
@@ -148,7 +151,7 @@ private void restoreImages(BackupImage[] images, TableName sTable, TableName tTa
148151
+ tableBackupPath.toString());
149152
conf.set(JOB_NAME_CONF_KEY, "Full_Restore-" + backupId + "-" + tTable);
150153
restoreTool.fullRestoreTable(conn, tableBackupPath, sTable, tTable, truncateIfExists,
151-
lastIncrBackupId);
154+
isKeepOriginalSplits, lastIncrBackupId);
152155
conf.unset(JOB_NAME_CONF_KEY);
153156
} else { // incremental Backup
154157
throw new IOException("Unexpected backup type " + image.getType());
@@ -183,7 +186,7 @@ private void restoreImages(BackupImage[] images, TableName sTable, TableName tTa
183186
dirList.toArray(paths);
184187
conf.set(JOB_NAME_CONF_KEY, "Incremental_Restore-" + backupId + "-" + tTable);
185188
restoreTool.incrementalRestoreTable(conn, tableBackupPath, paths, new TableName[] { sTable },
186-
new TableName[] { tTable }, lastIncrBackupId);
189+
new TableName[] { tTable }, lastIncrBackupId, isKeepOriginalSplits);
187190
LOG.info(sTable + " has been successfully restored to " + tTable);
188191
}
189192

@@ -208,7 +211,7 @@ private List<Path> getFilesRecursively(String fileBackupDir)
208211
* @throws IOException exception
209212
*/
210213
private void restore(BackupManifest manifest, TableName[] sTableArray, TableName[] tTableArray,
211-
boolean isOverwrite) throws IOException {
214+
boolean isOverwrite, boolean isKeepOriginalSplits) throws IOException {
212215
TreeSet<BackupImage> restoreImageSet = new TreeSet<>();
213216

214217
for (int i = 0; i < sTableArray.length; i++) {
@@ -223,7 +226,7 @@ private void restore(BackupManifest manifest, TableName[] sTableArray, TableName
223226
set.addAll(depList);
224227
BackupImage[] arr = new BackupImage[set.size()];
225228
set.toArray(arr);
226-
restoreImages(arr, table, tTableArray[i], isOverwrite);
229+
restoreImages(arr, table, tTableArray[i], isOverwrite, isKeepOriginalSplits);
227230
restoreImageSet.addAll(list);
228231
if (restoreImageSet != null && !restoreImageSet.isEmpty()) {
229232
LOG.info("Restore includes the following image(s):");
@@ -257,6 +260,6 @@ public void execute() throws IOException {
257260
Path rootPath = new Path(backupRootDir);
258261
BackupManifest manifest = HBackupFileSystem.getManifest(conf, rootPath, backupId);
259262

260-
restore(manifest, sTableArray, tTableArray, isOverwrite);
263+
restore(manifest, sTableArray, tTableArray, isOverwrite, isKeepOriginalSplits);
261264
}
262265
}

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/mapreduce/MapReduceHFileSplitterJob.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.hadoop.hbase.mapreduce.HFileInputFormat;
3636
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
3737
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
38+
import org.apache.hadoop.hbase.snapshot.SnapshotRegionLocator;
3839
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
3940
import org.apache.hadoop.hbase.util.MapReduceExtendedCell;
4041
import org.apache.hadoop.io.NullWritable;
@@ -118,7 +119,7 @@ public Job createSubmittableJob(String[] args) throws IOException {
118119
job.setMapOutputValueClass(MapReduceExtendedCell.class);
119120
try (Connection conn = ConnectionFactory.createConnection(conf);
120121
Table table = conn.getTable(tableName);
121-
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
122+
RegionLocator regionLocator = getRegionLocator(conf, conn, tableName)) {
122123
HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
123124
}
124125
LOG.debug("success configuring load incremental job");
@@ -171,4 +172,13 @@ public int run(String[] args) throws Exception {
171172
int result = job.waitForCompletion(true) ? 0 : 1;
172173
return result;
173174
}
175+
176+
private static RegionLocator getRegionLocator(Configuration conf, Connection conn,
177+
TableName table) throws IOException {
178+
if (SnapshotRegionLocator.shouldUseSnapshotRegionLocator(conf, table)) {
179+
return SnapshotRegionLocator.create(conf, table);
180+
}
181+
182+
return conn.getRegionLocator(table);
183+
}
174184
}

0 commit comments

Comments
 (0)