Skip to content

Commit 13d1c23

Browse files
committed
HBASE-22627 Port HBASE-22617 (Recovered WAL directories not getting cleaned up) to branch-1 (#339)
HBASE-22617 Recovered WAL directories not getting cleaned up (Duo Zhang) Signed-off-by: Zach York <zyork@apache.org>
1 parent e77ebb7 commit 13d1c23

File tree

17 files changed

+143
-128
lines changed

17 files changed

+143
-128
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
import org.apache.hadoop.fs.FileSystem;
3333
import org.apache.hadoop.fs.Path;
3434
import org.apache.hadoop.fs.PathFilter;
35-
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3635
import org.apache.hadoop.hbase.HRegionInfo;
3736
import org.apache.hadoop.hbase.classification.InterfaceAudience;
38-
import org.apache.hadoop.hbase.regionserver.HRegion;
3937
import org.apache.hadoop.hbase.regionserver.StoreFile;
4038
import org.apache.hadoop.hbase.util.Bytes;
4139
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -85,7 +83,7 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo
8583
throws IOException {
8684
Path rootDir = FSUtils.getRootDir(conf);
8785
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
88-
HRegion.getRegionDir(rootDir, info));
86+
FSUtils.getRegionDirFromRootDir(rootDir, info));
8987
}
9088

9189
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.hadoop.hbase.TableName;
3232
import org.apache.hadoop.hbase.HConstants;
3333
import org.apache.hadoop.hbase.HRegionInfo;
34-
import org.apache.hadoop.hbase.regionserver.HRegion;
3534
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
3635
import org.apache.hadoop.hbase.util.FSUtils;
3736
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
@@ -486,7 +485,7 @@ public static Path getHFileFromBackReference(final Path rootDir, final Path link
486485
String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
487486
regionPath.getName(), hfileName);
488487
Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
489-
Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
488+
Path regionDir = new Path(linkTableDir, linkRegionName);
490489
return new Path(new Path(regionDir, familyPath.getName()), linkName);
491490
}
492491

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ public boolean checkFileSystem() {
212212
return this.walFsOk;
213213
}
214214

215-
protected FileSystem getWALFileSystem() {
215+
public FileSystem getWALFileSystem() {
216216
return this.walFs;
217217
}
218218

@@ -676,6 +676,4 @@ public void archiveMetaLog(final ServerName serverName) {
676676
LOG.warn("Failed archiving meta log for server " + serverName, ie);
677677
}
678678
}
679-
680-
681679
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import org.apache.hadoop.hbase.client.Scan;
4545
import org.apache.hadoop.hbase.client.Table;
4646
import org.apache.hadoop.hbase.exceptions.HBaseException;
47-
import org.apache.hadoop.hbase.regionserver.HRegion;
4847
import org.apache.hadoop.hbase.master.AssignmentManager;
4948
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
5049
import org.apache.hadoop.hbase.master.MasterFileSystem;
@@ -336,7 +335,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
336335
for (HRegionInfo hri : regions) {
337336
LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
338337
HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
339-
tempTableDir, HRegion.getRegionDir(tempTableDir, hri.getEncodedName()));
338+
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
340339
}
341340
LOG.debug("Table '" + tableName + "' archived!");
342341
}

hbase-server/src/main/java/org/apache/hadoop/hbase/migration/NamespaceUpgrade.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ public void migrateMeta() throws IOException {
319319
}
320320

321321
// Since meta table name has changed rename meta region dir from it's old encoding to new one
322-
Path oldMetaRegionDir = HRegion.getRegionDir(rootDir,
323-
new Path(newMetaDir, "1028785192").toString());
322+
Path oldMetaRegionDir = new Path(rootDir, new Path(newMetaDir, "1028785192").toString());
324323
if (fs.exists(oldMetaRegionDir)) {
325324
LOG.info("Migrating meta region " + oldMetaRegionDir + " to " + newMetaRegionDir);
326325
if (!fs.rename(oldMetaRegionDir, newMetaRegionDir)) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 64 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,22 @@
1818
*/
1919
package org.apache.hadoop.hbase.regionserver;
2020

21+
import com.google.common.annotations.VisibleForTesting;
22+
import com.google.common.base.Optional;
23+
import com.google.common.base.Preconditions;
24+
import com.google.common.collect.Iterables;
25+
import com.google.common.collect.Lists;
26+
import com.google.common.collect.Maps;
27+
import com.google.common.collect.Sets;
28+
import com.google.common.io.Closeables;
29+
import com.google.protobuf.ByteString;
30+
import com.google.protobuf.Descriptors;
31+
import com.google.protobuf.Message;
32+
import com.google.protobuf.RpcCallback;
33+
import com.google.protobuf.RpcController;
34+
import com.google.protobuf.Service;
35+
import com.google.protobuf.TextFormat;
36+
2137
import java.io.EOFException;
2238
import java.io.FileNotFoundException;
2339
import java.io.IOException;
@@ -187,20 +203,6 @@
187203
import org.apache.htrace.Trace;
188204
import org.apache.htrace.TraceScope;
189205

190-
import com.google.common.annotations.VisibleForTesting;
191-
import com.google.common.base.Optional;
192-
import com.google.common.base.Preconditions;
193-
import com.google.common.collect.Lists;
194-
import com.google.common.collect.Maps;
195-
import com.google.common.io.Closeables;
196-
import com.google.protobuf.ByteString;
197-
import com.google.protobuf.Descriptors;
198-
import com.google.protobuf.Message;
199-
import com.google.protobuf.RpcCallback;
200-
import com.google.protobuf.RpcController;
201-
import com.google.protobuf.Service;
202-
import com.google.protobuf.TextFormat;
203-
204206
@InterfaceAudience.Private
205207
public class HRegion implements HeapSize, PropagatingConfigurationObserver, Region {
206208
private static final Log LOG = LogFactory.getLog(HRegion.class);
@@ -4033,7 +4035,7 @@ private void removeNonExistentColumnFamilyForReplay(
40334035
if (nonExistentList != null) {
40344036
for (byte[] family : nonExistentList) {
40354037
// Perhaps schema was changed between crash and replay
4036-
LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
4038+
LOG.info("No family for " + Bytes.toString(family) + " omit from replay.");
40374039
familyMap.remove(family);
40384040
}
40394041
}
@@ -4146,62 +4148,76 @@ protected long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
41464148
minSeqIdForTheRegion = maxSeqIdInStore;
41474149
}
41484150
}
4149-
long seqid = minSeqIdForTheRegion;
4151+
long seqId = minSeqIdForTheRegion;
41504152

41514153
FileSystem walFS = getWalFileSystem();
4152-
Path regionDir = getWALRegionDir();
41534154
FileSystem rootFS = getFilesystem();
4154-
Path defaultRegionDir = getRegionDir(FSUtils.getRootDir(conf), getRegionInfo());
4155+
Path regionDir = FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(conf), getRegionInfo());
4156+
Path regionWALDir = getWALRegionDir();
4157+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
4158+
getRegionInfo().getEncodedName());
41554159

4160+
// We made a mistake in HBASE-20734 so we need to do this dirty hack...
4161+
NavigableSet<Path> filesUnderWrongRegionWALDir =
4162+
WALSplitter.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
4163+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4164+
filesUnderWrongRegionWALDir, reporter, regionDir));
41564165
// This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
41574166
// under the root dir even if walDir is set.
4158-
NavigableSet<Path> filesUnderRootDir = null;
4159-
if (!regionDir.equals(defaultRegionDir)) {
4160-
filesUnderRootDir =
4161-
WALSplitter.getSplitEditFilesSorted(rootFS, defaultRegionDir);
4162-
seqid = Math.max(seqid,
4163-
replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS, filesUnderRootDir, reporter,
4164-
defaultRegionDir));
4165-
}
4166-
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionDir);
4167-
seqid = Math.max(seqid, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4168-
files, reporter, regionDir));
4169-
4170-
if (seqid > minSeqIdForTheRegion) {
4167+
NavigableSet<Path> filesUnderRootDir = Sets.newTreeSet();
4168+
if (!regionWALDir.equals(regionDir)) {
4169+
filesUnderRootDir = WALSplitter.getSplitEditFilesSorted(rootFS, regionDir);
4170+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
4171+
filesUnderRootDir, reporter, regionDir));
4172+
}
4173+
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionWALDir);
4174+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4175+
files, reporter, regionWALDir));
4176+
if (seqId > minSeqIdForTheRegion) {
41714177
// Then we added some edits to memory. Flush and cleanup split edit files.
4172-
internalFlushcache(null, seqid, stores.values(), status, false);
4178+
internalFlushcache(null, seqId, stores.values(), status, false);
41734179
}
4174-
// Now delete the content of recovered edits. We're done w/ them.
4175-
if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
4180+
// Now delete the content of recovered edits. We're done w/ them.
4181+
if (conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
41764182
// For debugging data loss issues!
41774183
// If this flag is set, make use of the hfile archiving by making recovered.edits a fake
41784184
// column family. Have to fake out file type too by casting our recovered.edits as storefiles
4179-
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionDir).getName();
4180-
Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
4181-
for (Path file: files) {
4182-
fakeStoreFiles.add(
4183-
new StoreFile(walFS, file, this.conf, null, null));
4185+
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionWALDir).getName();
4186+
Set<StoreFile> fakeStoreFiles = new HashSet<>();
4187+
for (Path file: Iterables.concat(files, filesUnderWrongRegionWALDir)) {
4188+
fakeStoreFiles.add(new StoreFile(walFS, file, conf, null, null));
4189+
}
4190+
for (Path file: filesUnderRootDir) {
4191+
fakeStoreFiles.add(new StoreFile(rootFS, file, conf, null, null));
41844192
}
41854193
getRegionWALFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
41864194
} else {
4187-
if (filesUnderRootDir != null) {
4188-
for (Path file : filesUnderRootDir) {
4189-
if (!rootFS.delete(file, false)) {
4190-
LOG.error("Failed delete of {} under root directory." + file);
4191-
} else {
4192-
LOG.debug("Deleted recovered.edits root directory file=" + file);
4193-
}
4195+
for (Path file : filesUnderRootDir) {
4196+
if (!rootFS.delete(file, false)) {
4197+
LOG.error("Failed delete of " + file + " from under the root directory");
4198+
} else {
4199+
LOG.debug("Deleted recovered.edits under root directory, file=" + file);
41944200
}
41954201
}
4196-
for (Path file: files) {
4202+
for (Path file : Iterables.concat(files, filesUnderWrongRegionWALDir)) {
41974203
if (!walFS.delete(file, false)) {
41984204
LOG.error("Failed delete of " + file);
41994205
} else {
42004206
LOG.debug("Deleted recovered.edits file=" + file);
42014207
}
42024208
}
42034209
}
4204-
return seqid;
4210+
4211+
// We have replayed all the recovered edits. Let's delete the wrong directories introduced
4212+
// in HBASE-20734, see HBASE-22617 for more details.
4213+
FileSystem walFs = getWalFileSystem();
4214+
if (walFs.exists(wrongRegionWALDir)) {
4215+
if (!walFs.delete(wrongRegionWALDir, true)) {
4216+
LOG.warn("Unable to delete " + wrongRegionWALDir);
4217+
}
4218+
}
4219+
4220+
return seqId;
42054221
}
42064222

42074223
private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
@@ -7023,34 +7039,6 @@ public static void addRegionToMETA(final HRegion meta, final HRegion r) throws I
70237039
meta.put(row, HConstants.CATALOG_FAMILY, cells);
70247040
}
70257041

7026-
/**
7027-
* Computes the Path of the HRegion
7028-
*
7029-
* @param tabledir qualified path for table
7030-
* @param name ENCODED region name
7031-
* @return Path of HRegion directory
7032-
* @deprecated For tests only; to be removed.
7033-
*/
7034-
@Deprecated
7035-
public static Path getRegionDir(final Path tabledir, final String name) {
7036-
return new Path(tabledir, name);
7037-
}
7038-
7039-
/**
7040-
* Computes the Path of the HRegion
7041-
*
7042-
* @param rootdir qualified path of HBase root directory
7043-
* @param info HRegionInfo for the region
7044-
* @return qualified path of region directory
7045-
* @deprecated For tests only; to be removed.
7046-
*/
7047-
@Deprecated
7048-
@VisibleForTesting
7049-
public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
7050-
return new Path(
7051-
FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
7052-
}
7053-
70547042
/**
70557043
* Determines if the specified row is within the row range specified by the
70567044
* specified HRegionInfo

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,19 +557,26 @@ Path commitDaughterRegion(final HRegionInfo regionInfo)
557557
/**
558558
* Create the region splits directory.
559559
*/
560-
void createSplitsDir() throws IOException {
560+
void createSplitsDir(HRegionInfo daughterA, HRegionInfo daughterB) throws IOException {
561561
Path splitdir = getSplitsDir();
562562
if (fs.exists(splitdir)) {
563563
LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it");
564564
if (!deleteDir(splitdir)) {
565-
throw new IOException("Failed deletion of " + splitdir
566-
+ " before creating them again.");
565+
throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
567566
}
568567
}
569568
// splitDir doesn't exists now. No need to do an exists() call for it.
570569
if (!createDir(splitdir)) {
571570
throw new IOException("Failed create of " + splitdir);
572571
}
572+
Path daughterATmpDir = getSplitsDir(daughterA);
573+
if (!createDir(daughterATmpDir)) {
574+
throw new IOException("Failed create of " + daughterATmpDir);
575+
}
576+
Path daughterBTmpDir = getSplitsDir(daughterB);
577+
if (!createDir(daughterBTmpDir)) {
578+
throw new IOException("Failed create of " + daughterBTmpDir);
579+
}
573580
}
574581

575582
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransactionImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ public PairOfSameType<Region> stepsBeforePONR(final Server server,
383383
hri_b, std);
384384
}
385385

386-
this.parent.getRegionFileSystem().createSplitsDir();
386+
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
387387

388388
transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
389389

hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,8 +642,9 @@ private void restoreReferenceFile(final Path familyDir, final HRegionInfo region
642642
if (linkPath != null) {
643643
in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
644644
} else {
645-
linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(),
646-
regionInfo.getEncodedName()), familyDir.getName()), hfileName);
645+
linkPath = new Path(new Path(new Path(snapshotManifest.getSnapshotDir(),
646+
regionInfo.getEncodedName()),
647+
familyDir.getName()), hfileName);
647648
in = fs.open(linkPath);
648649
}
649650
OutputStream out = fs.create(outPath);

0 commit comments

Comments
 (0)