Skip to content

Commit 00e4a8c

Browse files
committed
HBASE-29029 Refactor BackupHFileCleaner + fix test
The TestBackupHFileCleaner test was broken, as it used a different API to register bulk loads than the API that is actually used to register bulk loads during backups. The test also incorrectly closed the FS of the HBaseTestingUtil, causing this test to block for about 5 minutes during shutdown. Both the test and BackupHFileCleaner itself were overly convoluted and are cleaned up.
1 parent fbb7080 commit 00e4a8c

File tree

4 files changed

+163
-272
lines changed

4 files changed

+163
-272
lines changed

hbase-backup/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@
112112
<scope>test</scope>
113113
</dependency>
114114
<!-- General dependencies -->
115+
<dependency>
116+
<groupId>com.github.stephenc.findbugs</groupId>
117+
<artifactId>findbugs-annotations</artifactId>
118+
<scope>compile</scope>
119+
<optional>true</optional>
120+
</dependency>
115121
<dependency>
116122
<groupId>org.apache.commons</groupId>
117123
<artifactId>commons-lang3</artifactId>

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/BackupHFileCleaner.java

Lines changed: 49 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,18 @@
1818
package org.apache.hadoop.hbase.backup;
1919

2020
import java.io.IOException;
21-
import java.util.ArrayList;
2221
import java.util.Collections;
2322
import java.util.HashSet;
24-
import java.util.List;
25-
import java.util.Map;
2623
import java.util.Set;
24+
import org.apache.commons.lang3.NotImplementedException;
2725
import org.apache.hadoop.conf.Configuration;
2826
import org.apache.hadoop.fs.FileStatus;
2927
import org.apache.hadoop.fs.Path;
3028
import org.apache.hadoop.hbase.Abortable;
3129
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3230
import org.apache.hadoop.hbase.TableName;
3331
import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
32+
import org.apache.hadoop.hbase.backup.impl.BulkLoad;
3433
import org.apache.hadoop.hbase.client.Connection;
3534
import org.apache.hadoop.hbase.client.ConnectionFactory;
3635
import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
@@ -42,106 +41,81 @@
4241
import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
4342

4443
/**
45-
* Implementation of a file cleaner that checks if an hfile is still referenced by backup before
46-
* deleting it from hfile archive directory.
44+
* File cleaner that prevents deletion of HFiles that are still required by future incremental
45+
* backups.
46+
* <p>
47+
* Bulk loaded HFiles that are needed by future updates are stored in the backup system table.
4748
*/
4849
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
4950
public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable {
5051
private static final Logger LOG = LoggerFactory.getLogger(BackupHFileCleaner.class);
52+
5153
private boolean stopped = false;
52-
private boolean aborted;
53-
private Configuration conf;
54+
private boolean aborted = false;
5455
private Connection connection;
55-
private long prevReadFromBackupTbl = 0, // timestamp of most recent read from backup:system table
56-
secondPrevReadFromBackupTbl = 0; // timestamp of 2nd most recent read from backup:system table
57-
// used by unit test to skip reading backup:system
58-
private boolean checkForFullyBackedUpTables = true;
59-
private List<TableName> fullyBackedUpTables = null;
60-
61-
private Set<String> getFilenameFromBulkLoad(Map<byte[], List<Path>>[] maps) {
62-
Set<String> filenames = new HashSet<>();
63-
for (Map<byte[], List<Path>> map : maps) {
64-
if (map == null) {
65-
continue;
66-
}
67-
68-
for (List<Path> paths : map.values()) {
69-
for (Path p : paths) {
70-
filenames.add(p.getName());
71-
}
72-
}
73-
}
74-
return filenames;
75-
}
76-
77-
private Set<String> loadHFileRefs(List<TableName> tableList) throws IOException {
78-
if (connection == null) {
79-
connection = ConnectionFactory.createConnection(conf);
80-
}
81-
try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
82-
Map<byte[], List<Path>>[] res = tbl.readBulkLoadedFiles(null, tableList);
83-
secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
84-
prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
85-
return getFilenameFromBulkLoad(res);
86-
}
87-
}
88-
89-
@InterfaceAudience.Private
90-
void setCheckForFullyBackedUpTables(boolean b) {
91-
checkForFullyBackedUpTables = b;
92-
}
56+
// timestamp of most recent read from backup system table
57+
private long prevReadFromBackupTbl = 0;
58+
// timestamp of 2nd most recent read from backup system table
59+
private long secondPrevReadFromBackupTbl = 0;
9360

9461
@Override
9562
public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
96-
if (conf == null) {
97-
return files;
63+
if (stopped) {
64+
return Collections.emptyList();
9865
}
99-
// obtain the Set of TableName's which have been fully backed up
100-
// so that we filter BulkLoad to be returned from server
101-
if (checkForFullyBackedUpTables) {
102-
if (connection == null) {
103-
return files;
104-
}
10566

106-
try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
107-
fullyBackedUpTables = new ArrayList<>(tbl.getTablesIncludedInBackups());
108-
} catch (IOException ioe) {
109-
LOG.error("Failed to get tables which have been fully backed up, skipping checking", ioe);
110-
return Collections.emptyList();
67+
// We use filenames because the HFile will have been moved to the archive since it
68+
// was registered.
69+
final Set<String> hfileFilenames = new HashSet<>();
70+
try (BackupSystemTable tbl = new BackupSystemTable(connection)) {
71+
Set<TableName> tablesIncludedInBackups = fetchFullyBackedUpTables(tbl);
72+
for (BulkLoad bulkLoad : tbl.readBulkloadRows(tablesIncludedInBackups)) {
73+
hfileFilenames.add(new Path(bulkLoad.getHfilePath()).getName());
11174
}
112-
Collections.sort(fullyBackedUpTables);
113-
}
114-
final Set<String> hfileRefs;
115-
try {
116-
hfileRefs = loadHFileRefs(fullyBackedUpTables);
75+
LOG.debug("Found {} unique HFile filenames registered as bulk loads.", hfileFilenames.size());
11776
} catch (IOException ioe) {
118-
LOG.error("Failed to read hfile references, skipping checking deletable files", ioe);
77+
LOG.error(
78+
"Failed to read registered bulk load references from backup system table, marking all files as non-deletable.",
79+
ioe);
11980
return Collections.emptyList();
12081
}
121-
Iterable<FileStatus> deletables = Iterables.filter(files, file -> {
122-
// If the file is recent, be conservative and wait for one more scan of backup:system table
82+
83+
secondPrevReadFromBackupTbl = prevReadFromBackupTbl;
84+
prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime();
85+
86+
return Iterables.filter(files, file -> {
87+
// If the file is recent, be conservative and wait for one more scan of the bulk loads
12388
if (file.getModificationTime() > secondPrevReadFromBackupTbl) {
89+
LOG.debug("Preventing deletion due to timestamp: {}", file.getPath().toString());
12490
return false;
12591
}
92+
// A file can be deleted if it is not registered as a backup bulk load.
12693
String hfile = file.getPath().getName();
127-
boolean foundHFileRef = hfileRefs.contains(hfile);
128-
return !foundHFileRef;
94+
if (hfileFilenames.contains(hfile)) {
95+
LOG.debug("Preventing deletion due to bulk load registration in backup system table: {}",
96+
file.getPath().toString());
97+
return false;
98+
} else {
99+
LOG.debug("OK to delete: {}", file.getPath().toString());
100+
return true;
101+
}
129102
});
130-
return deletables;
103+
}
104+
105+
protected Set<TableName> fetchFullyBackedUpTables(BackupSystemTable tbl) throws IOException {
106+
return tbl.getTablesIncludedInBackups();
131107
}
132108

133109
@Override
134110
public boolean isFileDeletable(FileStatus fStat) {
135-
// work is done in getDeletableFiles()
136-
return true;
111+
throw new NotImplementedException("This method should not be called");
137112
}
138113

139114
@Override
140115
public void setConf(Configuration config) {
141-
this.conf = config;
142116
this.connection = null;
143117
try {
144-
this.connection = ConnectionFactory.createConnection(conf);
118+
this.connection = ConnectionFactory.createConnection(config);
145119
} catch (IOException ioe) {
146120
LOG.error("Couldn't establish connection", ioe);
147121
}
@@ -156,7 +130,7 @@ public void stop(String why) {
156130
try {
157131
this.connection.close();
158132
} catch (IOException ioe) {
159-
LOG.debug("Got " + ioe + " when closing connection");
133+
LOG.debug("Got IOException when closing connection", ioe);
160134
}
161135
}
162136
this.stopped = true;
@@ -169,7 +143,7 @@ public boolean isStopped() {
169143

170144
@Override
171145
public void abort(String why, Throwable e) {
172-
LOG.warn("Aborting ReplicationHFileCleaner because " + why, e);
146+
LOG.warn("Aborting ReplicationHFileCleaner because {}", why, e);
173147
this.aborted = true;
174148
stop(why);
175149
}

0 commit comments

Comments
 (0)