Skip to content

Commit 07288fa

Browse files
committed
HBASE-22627 Port HBASE-22617 (Recovered WAL directories not getting cleaned up) to branch-1 (#339)
HBASE-22617 Recovered WAL directories not getting cleaned up (Duo Zhang) Signed-off-by: Zach York <zyork@apache.org>
1 parent 4796608 commit 07288fa

File tree

18 files changed

+132
-118
lines changed

18 files changed

+132
-118
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
import org.apache.hadoop.fs.FileSystem;
3333
import org.apache.hadoop.fs.Path;
3434
import org.apache.hadoop.fs.PathFilter;
35-
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3635
import org.apache.hadoop.hbase.HRegionInfo;
3736
import org.apache.hadoop.hbase.classification.InterfaceAudience;
38-
import org.apache.hadoop.hbase.regionserver.HRegion;
3937
import org.apache.hadoop.hbase.regionserver.StoreFile;
4038
import org.apache.hadoop.hbase.util.Bytes;
4139
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -85,7 +83,7 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo
8583
throws IOException {
8684
Path rootDir = FSUtils.getRootDir(conf);
8785
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
88-
HRegion.getRegionDir(rootDir, info));
86+
FSUtils.getRegionDirFromRootDir(rootDir, info));
8987
}
9088

9189
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.hadoop.hbase.TableName;
3232
import org.apache.hadoop.hbase.HConstants;
3333
import org.apache.hadoop.hbase.HRegionInfo;
34-
import org.apache.hadoop.hbase.regionserver.HRegion;
3534
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
3635
import org.apache.hadoop.hbase.util.FSUtils;
3736
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
@@ -486,7 +485,7 @@ public static Path getHFileFromBackReference(final Path rootDir, final Path link
486485
String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
487486
regionPath.getName(), hfileName);
488487
Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
489-
Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
488+
Path regionDir = new Path(linkTableDir, linkRegionName);
490489
return new Path(new Path(regionDir, familyPath.getName()), linkName);
491490
}
492491

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ public boolean checkFileSystem() {
212212
return this.walFsOk;
213213
}
214214

215-
protected FileSystem getWALFileSystem() {
215+
public FileSystem getWALFileSystem() {
216216
return this.walFs;
217217
}
218218

@@ -689,6 +689,4 @@ public void archiveMetaLog(final ServerName serverName) {
689689
LOG.warn("Failed archiving meta log for server " + serverName, ie);
690690
}
691691
}
692-
693-
694692
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import org.apache.hadoop.hbase.client.Scan;
4545
import org.apache.hadoop.hbase.client.Table;
4646
import org.apache.hadoop.hbase.exceptions.HBaseException;
47-
import org.apache.hadoop.hbase.regionserver.HRegion;
4847
import org.apache.hadoop.hbase.master.AssignmentManager;
4948
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
5049
import org.apache.hadoop.hbase.master.MasterFileSystem;
@@ -336,7 +335,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
336335
for (HRegionInfo hri : regions) {
337336
LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
338337
HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
339-
tempTableDir, HRegion.getRegionDir(tempTableDir, hri.getEncodedName()));
338+
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
340339
}
341340
LOG.debug("Table '" + tableName + "' archived!");
342341
}

hbase-server/src/main/java/org/apache/hadoop/hbase/migration/NamespaceUpgrade.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ public void migrateMeta() throws IOException {
319319
}
320320

321321
// Since meta table name has changed rename meta region dir from it's old encoding to new one
322-
Path oldMetaRegionDir = HRegion.getRegionDir(rootDir,
323-
new Path(newMetaDir, "1028785192").toString());
322+
Path oldMetaRegionDir = new Path(rootDir, new Path(newMetaDir, "1028785192").toString());
324323
if (fs.exists(oldMetaRegionDir)) {
325324
LOG.info("Migrating meta region " + oldMetaRegionDir + " to " + newMetaRegionDir);
326325
if (!fs.rename(oldMetaRegionDir, newMetaRegionDir)) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 50 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
import com.google.common.annotations.VisibleForTesting;
2222
import com.google.common.base.Optional;
2323
import com.google.common.base.Preconditions;
24+
import com.google.common.collect.Iterables;
2425
import com.google.common.collect.Lists;
2526
import com.google.common.collect.Maps;
27+
import com.google.common.collect.Sets;
2628
import com.google.common.io.Closeables;
2729
import com.google.protobuf.ByteString;
2830
import com.google.protobuf.Descriptors;
@@ -4163,7 +4165,7 @@ private void removeNonExistentColumnFamilyForReplay(
41634165
if (nonExistentList != null) {
41644166
for (byte[] family : nonExistentList) {
41654167
// Perhaps schema was changed between crash and replay
4166-
LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
4168+
LOG.info("No family for " + Bytes.toString(family) + " omit from replay.");
41674169
familyMap.remove(family);
41684170
}
41694171
}
@@ -4276,62 +4278,76 @@ protected long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
42764278
minSeqIdForTheRegion = maxSeqIdInStore;
42774279
}
42784280
}
4279-
long seqid = minSeqIdForTheRegion;
4281+
long seqId = minSeqIdForTheRegion;
42804282

42814283
FileSystem walFS = getWalFileSystem();
4282-
Path regionDir = getWALRegionDir();
42834284
FileSystem rootFS = getFilesystem();
4284-
Path defaultRegionDir = getRegionDir(FSUtils.getRootDir(conf), getRegionInfo());
4285+
Path regionDir = FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(conf), getRegionInfo());
4286+
Path regionWALDir = getWALRegionDir();
4287+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
4288+
getRegionInfo().getEncodedName());
42854289

4290+
// We made a mistake in HBASE-20734 so we need to do this dirty hack...
4291+
NavigableSet<Path> filesUnderWrongRegionWALDir =
4292+
WALSplitter.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
4293+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4294+
filesUnderWrongRegionWALDir, reporter, regionDir));
42864295
// This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
42874296
// under the root dir even if walDir is set.
4288-
NavigableSet<Path> filesUnderRootDir = null;
4289-
if (!regionDir.equals(defaultRegionDir)) {
4290-
filesUnderRootDir =
4291-
WALSplitter.getSplitEditFilesSorted(rootFS, defaultRegionDir);
4292-
seqid = Math.max(seqid,
4293-
replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS, filesUnderRootDir, reporter,
4294-
defaultRegionDir));
4295-
}
4296-
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionDir);
4297-
seqid = Math.max(seqid, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4298-
files, reporter, regionDir));
4299-
4300-
if (seqid > minSeqIdForTheRegion) {
4297+
NavigableSet<Path> filesUnderRootDir = Sets.newTreeSet();
4298+
if (!regionWALDir.equals(regionDir)) {
4299+
filesUnderRootDir = WALSplitter.getSplitEditFilesSorted(rootFS, regionDir);
4300+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
4301+
filesUnderRootDir, reporter, regionDir));
4302+
}
4303+
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionWALDir);
4304+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4305+
files, reporter, regionWALDir));
4306+
if (seqId > minSeqIdForTheRegion) {
43014307
// Then we added some edits to memory. Flush and cleanup split edit files.
4302-
internalFlushcache(null, seqid, stores.values(), status, false);
4308+
internalFlushcache(null, seqId, stores.values(), status, false);
43034309
}
4304-
// Now delete the content of recovered edits. We're done w/ them.
4305-
if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
4310+
// Now delete the content of recovered edits. We're done w/ them.
4311+
if (conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
43064312
// For debugging data loss issues!
43074313
// If this flag is set, make use of the hfile archiving by making recovered.edits a fake
43084314
// column family. Have to fake out file type too by casting our recovered.edits as storefiles
4309-
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionDir).getName();
4310-
Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
4311-
for (Path file: files) {
4312-
fakeStoreFiles.add(
4313-
new StoreFile(walFS, file, this.conf, null, null));
4315+
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionWALDir).getName();
4316+
Set<StoreFile> fakeStoreFiles = new HashSet<>();
4317+
for (Path file: Iterables.concat(files, filesUnderWrongRegionWALDir)) {
4318+
fakeStoreFiles.add(new StoreFile(walFS, file, conf, null, null));
4319+
}
4320+
for (Path file: filesUnderRootDir) {
4321+
fakeStoreFiles.add(new StoreFile(rootFS, file, conf, null, null));
43144322
}
43154323
getRegionWALFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
43164324
} else {
4317-
if (filesUnderRootDir != null) {
4318-
for (Path file : filesUnderRootDir) {
4319-
if (!rootFS.delete(file, false)) {
4320-
LOG.error("Failed delete of {} under root directory." + file);
4321-
} else {
4322-
LOG.debug("Deleted recovered.edits root directory file=" + file);
4323-
}
4325+
for (Path file : filesUnderRootDir) {
4326+
if (!rootFS.delete(file, false)) {
4327+
LOG.error("Failed delete of " + file + " from under the root directory");
4328+
} else {
4329+
LOG.debug("Deleted recovered.edits under root directory, file=" + file);
43244330
}
43254331
}
4326-
for (Path file: files) {
4332+
for (Path file : Iterables.concat(files, filesUnderWrongRegionWALDir)) {
43274333
if (!walFS.delete(file, false)) {
43284334
LOG.error("Failed delete of " + file);
43294335
} else {
43304336
LOG.debug("Deleted recovered.edits file=" + file);
43314337
}
43324338
}
43334339
}
4334-
return seqid;
4340+
4341+
// We have replayed all the recovered edits. Let's delete the wrong directories introduced
4342+
// in HBASE-20734, see HBASE-22617 for more details.
4343+
FileSystem walFs = getWalFileSystem();
4344+
if (walFs.exists(wrongRegionWALDir)) {
4345+
if (!walFs.delete(wrongRegionWALDir, true)) {
4346+
LOG.warn("Unable to delete " + wrongRegionWALDir);
4347+
}
4348+
}
4349+
4350+
return seqId;
43354351
}
43364352

43374353
private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
@@ -7196,34 +7212,6 @@ public static void addRegionToMETA(final HRegion meta, final HRegion r) throws I
71967212
meta.put(row, HConstants.CATALOG_FAMILY, cells);
71977213
}
71987214

7199-
/**
7200-
* Computes the Path of the HRegion
7201-
*
7202-
* @param tabledir qualified path for table
7203-
* @param name ENCODED region name
7204-
* @return Path of HRegion directory
7205-
* @deprecated For tests only; to be removed.
7206-
*/
7207-
@Deprecated
7208-
public static Path getRegionDir(final Path tabledir, final String name) {
7209-
return new Path(tabledir, name);
7210-
}
7211-
7212-
/**
7213-
* Computes the Path of the HRegion
7214-
*
7215-
* @param rootdir qualified path of HBase root directory
7216-
* @param info HRegionInfo for the region
7217-
* @return qualified path of region directory
7218-
* @deprecated For tests only; to be removed.
7219-
*/
7220-
@Deprecated
7221-
@VisibleForTesting
7222-
public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
7223-
return new Path(
7224-
FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
7225-
}
7226-
72277215
/**
72287216
* Determines if the specified row is within the row range specified by the
72297217
* specified HRegionInfo

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,19 +610,26 @@ Path commitDaughterRegion(final HRegionInfo regionInfo)
610610
/**
611611
* Create the region splits directory.
612612
*/
613-
void createSplitsDir() throws IOException {
613+
void createSplitsDir(HRegionInfo daughterA, HRegionInfo daughterB) throws IOException {
614614
Path splitdir = getSplitsDir();
615615
if (fs.exists(splitdir)) {
616616
LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it");
617617
if (!deleteDir(splitdir)) {
618-
throw new IOException("Failed deletion of " + splitdir
619-
+ " before creating them again.");
618+
throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
620619
}
621620
}
622621
// splitDir doesn't exists now. No need to do an exists() call for it.
623622
if (!createDir(splitdir)) {
624623
throw new IOException("Failed create of " + splitdir);
625624
}
625+
Path daughterATmpDir = getSplitsDir(daughterA);
626+
if (!createDir(daughterATmpDir)) {
627+
throw new IOException("Failed create of " + daughterATmpDir);
628+
}
629+
Path daughterBTmpDir = getSplitsDir(daughterB);
630+
if (!createDir(daughterBTmpDir)) {
631+
throw new IOException("Failed create of " + daughterBTmpDir);
632+
}
626633
}
627634

628635
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransactionImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ public PairOfSameType<Region> stepsBeforePONR(final Server server,
384384
hri_b, std);
385385
}
386386

387-
this.parent.getRegionFileSystem().createSplitsDir();
387+
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
388388

389389
transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
390390

hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,8 +648,9 @@ private void restoreReferenceFile(final Path familyDir, final HRegionInfo region
648648
if (linkPath != null) {
649649
in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
650650
} else {
651-
linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(),
652-
regionInfo.getEncodedName()), familyDir.getName()), hfileName);
651+
linkPath = new Path(new Path(new Path(snapshotManifest.getSnapshotDir(),
652+
regionInfo.getEncodedName()),
653+
familyDir.getName()), hfileName);
653654
in = fs.open(linkPath);
654655
}
655656
OutputStream out = fs.create(outPath);

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@
9090
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
9191
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
9292
import org.apache.hadoop.hbase.protobuf.generated.FSProtos;
93-
import org.apache.hadoop.hbase.regionserver.HRegion;
9493
import org.apache.hadoop.hdfs.DistributedFileSystem;
9594
import org.apache.hadoop.io.IOUtils;
9695
import org.apache.hadoop.io.SequenceFile;
@@ -1094,26 +1093,36 @@ private static boolean isValidWALRootDir(Path walDir, final Configuration c) thr
10941093
* @return the region directory used to store WALs under the WALRootDir
10951094
* @throws IOException if there is an exception determining the WALRootDir
10961095
*/
1097-
public static Path getWALRegionDir(final Configuration conf,
1098-
final HRegionInfo regionInfo)
1096+
public static Path getWALRegionDir(final Configuration conf, final HRegionInfo regionInfo)
10991097
throws IOException {
11001098
return new Path(getWALTableDir(conf, regionInfo.getTable()),
11011099
regionInfo.getEncodedName());
11021100
}
11031101

1102+
/**
1103+
* Returns the WAL region directory based on the region info
1104+
* @param conf configuration to determine WALRootDir
1105+
* @param tableName the table name
1106+
* @param encodedRegionName the encoded region name
1107+
* @return the region directory used to store WALs under the WALRootDir
1108+
* @throws IOException if there is an exception determining the WALRootDir
1109+
*/
1110+
public static Path getWALRegionDir(final Configuration conf, final TableName tableName,
1111+
final String encodedRegionName) throws IOException {
1112+
return new Path(getWALTableDir(conf, tableName), encodedRegionName);
1113+
}
1114+
11041115
/**
11051116
* Checks if meta region exists
11061117
*
11071118
* @param fs file system
1108-
* @param rootdir root directory of HBase installation
1119+
* @param rootDir root directory of HBase installation
11091120
* @return true if exists
11101121
* @throws IOException e
11111122
*/
11121123
@SuppressWarnings("deprecation")
1113-
public static boolean metaRegionExists(FileSystem fs, Path rootdir)
1114-
throws IOException {
1115-
Path metaRegionDir =
1116-
HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO);
1124+
public static boolean metaRegionExists(FileSystem fs, Path rootDir) throws IOException {
1125+
Path metaRegionDir = getRegionDirFromRootDir(rootDir, HRegionInfo.FIRST_META_REGIONINFO);
11171126
return fs.exists(metaRegionDir);
11181127
}
11191128

@@ -1260,8 +1269,22 @@ public static Path getTableDir(Path rootdir, final TableName tableName) {
12601269
*/
12611270
public static Path getWALTableDir(final Configuration conf, final TableName tableName)
12621271
throws IOException {
1263-
return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
1264-
tableName.getQualifierAsString());
1272+
Path baseDir = new Path(getWALRootDir(conf), HConstants.BASE_NAMESPACE_DIR);
1273+
return new Path(new Path(baseDir, tableName.getNamespaceAsString()),
1274+
tableName.getQualifierAsString());
1275+
}
1276+
1277+
/**
1278+
* For backward compatibility with HBASE-20734, where we store recovered edits in a wrong
1279+
* directory without BASE_NAMESPACE_DIR. See HBASE-22617 for more details.
1280+
* @deprecated For compatibility, will be removed in 4.0.0.
1281+
*/
1282+
@Deprecated
1283+
public static Path getWrongWALRegionDir(final Configuration conf, final TableName tableName,
1284+
final String encodedRegionName) throws IOException {
1285+
Path wrongTableDir = new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
1286+
tableName.getQualifierAsString());
1287+
return new Path(wrongTableDir, encodedRegionName);
12651288
}
12661289

12671290
/**
@@ -1507,6 +1530,14 @@ protected boolean accept(Path p, @CheckForNull Boolean isDir) {
15071530
}
15081531
}
15091532

1533+
public static Path getRegionDirFromRootDir(Path rootDir, HRegionInfo region) {
1534+
return getRegionDirFromTableDir(getTableDir(rootDir, region.getTable()), region);
1535+
}
1536+
1537+
public static Path getRegionDirFromTableDir(Path tableDir, HRegionInfo region) {
1538+
return new Path(tableDir, ServerRegionReplicaUtil.getRegionInfoForFs(region).getEncodedName());
1539+
}
1540+
15101541
/**
15111542
* Given a particular table dir, return all the regiondirs inside it, excluding files such as
15121543
* .tableinfo

0 commit comments

Comments
 (0)