Skip to content

Commit ced4487

Browse files
committed
HBASE-22627 Port HBASE-22617 (Recovered WAL directories not getting cleaned up) to branch-1
HBASE-22617 Recovered WAL directories not getting cleaned up (Duo Zhang)
1 parent 5023690 commit ced4487

File tree

18 files changed

+157
-113
lines changed

18 files changed

+157
-113
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
import org.apache.hadoop.fs.FileSystem;
3333
import org.apache.hadoop.fs.Path;
3434
import org.apache.hadoop.fs.PathFilter;
35-
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3635
import org.apache.hadoop.hbase.HRegionInfo;
3736
import org.apache.hadoop.hbase.classification.InterfaceAudience;
38-
import org.apache.hadoop.hbase.regionserver.HRegion;
3937
import org.apache.hadoop.hbase.regionserver.StoreFile;
4038
import org.apache.hadoop.hbase.util.Bytes;
4139
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -85,7 +83,7 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo
8583
throws IOException {
8684
Path rootDir = FSUtils.getRootDir(conf);
8785
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
88-
HRegion.getRegionDir(rootDir, info));
86+
FSUtils.getRegionDirFromRootDir(rootDir, info));
8987
}
9088

9189
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.hadoop.hbase.TableName;
3232
import org.apache.hadoop.hbase.HConstants;
3333
import org.apache.hadoop.hbase.HRegionInfo;
34-
import org.apache.hadoop.hbase.regionserver.HRegion;
3534
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
3635
import org.apache.hadoop.hbase.util.FSUtils;
3736
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
@@ -486,7 +485,7 @@ public static Path getHFileFromBackReference(final Path rootDir, final Path link
486485
String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
487486
regionPath.getName(), hfileName);
488487
Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
489-
Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
488+
Path regionDir = new Path(linkTableDir, linkRegionName);
490489
return new Path(new Path(regionDir, familyPath.getName()), linkName);
491490
}
492491

hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ boolean cleanMergeRegion(final HRegionInfo mergedRegion,
219219
+ " from fs because merged region no longer holds references");
220220
HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
221221
HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
222+
cleanUpWalDirs(this.services.getMasterFileSystem(), regionA);
223+
cleanUpWalDirs(this.services.getMasterFileSystem(), regionB);
222224
MetaTableAccessor.deleteMergeQualifiers(services.getConnection(), mergedRegion);
223225
services.getAssignmentManager().getRegionStates().deleteRegion(regionA);
224226
services.getAssignmentManager().getRegionStates().deleteRegion(regionB);
@@ -365,6 +367,7 @@ boolean cleanParent(final HRegionInfo parent, Result rowContent)
365367
FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
366368
if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent);
367369
HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
370+
cleanUpWalDirs(this.services.getMasterFileSystem(), parent);
368371
MetaTableAccessor.deleteRegion(this.connection, parent);
369372
if (services.getAssignmentManager().getRegionStates() != null)
370373
services.getAssignmentManager().getRegionStates().deleteRegion(parent);
@@ -470,4 +473,25 @@ public boolean cleanMergeQualifier(final HRegionInfo region)
470473
return cleanMergeRegion(region, mergeRegions.getFirst(),
471474
mergeRegions.getSecond());
472475
}
476+
477+
private static void cleanUpWalDirs(MasterFileSystem mfs, HRegionInfo region)
478+
throws IOException {
479+
// Clean up WAL dirs
480+
FileSystem walFs = mfs.getWALFileSystem();
481+
// Clean up the directories on WAL filesystem also
482+
Path regionWALDir = FSUtils.getWALRegionDir(mfs.getConfiguration(), region);
483+
if (walFs.exists(regionWALDir)) {
484+
if (!walFs.delete(regionWALDir, true)) {
485+
LOG.debug("Unable to delete " + regionWALDir);
486+
}
487+
}
488+
// Clean up any misplaced files from HBASE-20734
489+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(mfs.getConfiguration(),
490+
region.getTable(), region.getEncodedName());
491+
if (walFs.exists(wrongRegionWALDir)) {
492+
if (!walFs.delete(wrongRegionWALDir, true)) {
493+
LOG.debug("Unable to delete " + wrongRegionWALDir);
494+
}
495+
}
496+
}
473497
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ public boolean checkFileSystem() {
214214
return this.walFsOk;
215215
}
216216

217-
protected FileSystem getWALFileSystem() {
217+
public FileSystem getWALFileSystem() {
218218
return this.walFs;
219219
}
220220

@@ -691,6 +691,4 @@ public void archiveMetaLog(final ServerName serverName) {
691691
LOG.warn("Failed archiving meta log for server " + serverName, ie);
692692
}
693693
}
694-
695-
696694
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import org.apache.hadoop.hbase.client.Scan;
4444
import org.apache.hadoop.hbase.client.Table;
4545
import org.apache.hadoop.hbase.exceptions.HBaseException;
46-
import org.apache.hadoop.hbase.regionserver.HRegion;
4746
import org.apache.hadoop.hbase.master.AssignmentManager;
4847
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
4948
import org.apache.hadoop.hbase.master.MasterFileSystem;
@@ -323,7 +322,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
323322
for (HRegionInfo hri : regions) {
324323
LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
325324
HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
326-
tempTableDir, HRegion.getRegionDir(tempTableDir, hri.getEncodedName()));
325+
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
327326
}
328327
LOG.debug("Table '" + tableName + "' archived!");
329328
}
@@ -332,6 +331,13 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
332331
if (!fs.delete(tempTableDir, true) && fs.exists(tempTableDir)) {
333332
throw new IOException("Couldn't delete " + tempTableDir);
334333
}
334+
335+
// Delete the directory on wal filesystem
336+
FileSystem walFs = mfs.getWALFileSystem();
337+
Path tableWALDir = FSUtils.getWALTableDir(env.getMasterConfiguration(), tableName);
338+
if (walFs.exists(tableWALDir) && !walFs.delete(tableWALDir, true)) {
339+
throw new IOException("Couldn't delete table dir on wal filesystem " + tableWALDir);
340+
}
335341
}
336342

337343
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/migration/NamespaceUpgrade.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ public void migrateMeta() throws IOException {
319319
}
320320

321321
// Since meta table name has changed rename meta region dir from it's old encoding to new one
322-
Path oldMetaRegionDir = HRegion.getRegionDir(rootDir,
323-
new Path(newMetaDir, "1028785192").toString());
322+
Path oldMetaRegionDir = new Path(rootDir, new Path(newMetaDir, "1028785192").toString());
324323
if (fs.exists(oldMetaRegionDir)) {
325324
LOG.info("Migrating meta region " + oldMetaRegionDir + " to " + newMetaRegionDir);
326325
if (!fs.rename(oldMetaRegionDir, newMetaRegionDir)) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 50 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
import com.google.common.annotations.VisibleForTesting;
2222
import com.google.common.base.Optional;
2323
import com.google.common.base.Preconditions;
24+
import com.google.common.collect.Iterables;
2425
import com.google.common.collect.Lists;
2526
import com.google.common.collect.Maps;
27+
import com.google.common.collect.Sets;
2628
import com.google.common.io.Closeables;
2729
import com.google.protobuf.ByteString;
2830
import com.google.protobuf.Descriptors;
@@ -4173,7 +4175,7 @@ private void removeNonExistentColumnFamilyForReplay(
41734175
if (nonExistentList != null) {
41744176
for (byte[] family : nonExistentList) {
41754177
// Perhaps schema was changed between crash and replay
4176-
LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
4178+
LOG.info("No family for " + Bytes.toString(family) + " omit from replay.");
41774179
familyMap.remove(family);
41784180
}
41794181
}
@@ -4286,62 +4288,76 @@ protected long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
42864288
minSeqIdForTheRegion = maxSeqIdInStore;
42874289
}
42884290
}
4289-
long seqid = minSeqIdForTheRegion;
4291+
long seqId = minSeqIdForTheRegion;
42904292

42914293
FileSystem walFS = getWalFileSystem();
4292-
Path regionDir = getWALRegionDir();
42934294
FileSystem rootFS = getFilesystem();
4294-
Path defaultRegionDir = getRegionDir(FSUtils.getRootDir(conf), getRegionInfo());
4295+
Path regionDir = FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(conf), getRegionInfo());
4296+
Path regionWALDir = getWALRegionDir();
4297+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
4298+
getRegionInfo().getEncodedName());
42954299

4300+
// We made a mistake in HBASE-20734 so we need to do this dirty hack...
4301+
NavigableSet<Path> filesUnderWrongRegionWALDir =
4302+
WALSplitter.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
4303+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4304+
filesUnderWrongRegionWALDir, reporter, regionDir));
42964305
// This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
42974306
// under the root dir even if walDir is set.
4298-
NavigableSet<Path> filesUnderRootDir = null;
4299-
if (!regionDir.equals(defaultRegionDir)) {
4300-
filesUnderRootDir =
4301-
WALSplitter.getSplitEditFilesSorted(rootFS, defaultRegionDir);
4302-
seqid = Math.max(seqid,
4303-
replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS, filesUnderRootDir, reporter,
4304-
defaultRegionDir));
4305-
}
4306-
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionDir);
4307-
seqid = Math.max(seqid, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4308-
files, reporter, regionDir));
4309-
4310-
if (seqid > minSeqIdForTheRegion) {
4307+
NavigableSet<Path> filesUnderRootDir = Sets.newTreeSet();
4308+
if (!regionWALDir.equals(regionDir)) {
4309+
filesUnderRootDir = WALSplitter.getSplitEditFilesSorted(rootFS, regionDir);
4310+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
4311+
filesUnderRootDir, reporter, regionDir));
4312+
}
4313+
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionWALDir);
4314+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4315+
files, reporter, regionWALDir));
4316+
if (seqId > minSeqIdForTheRegion) {
43114317
// Then we added some edits to memory. Flush and cleanup split edit files.
4312-
internalFlushcache(null, seqid, stores.values(), status, false);
4318+
internalFlushcache(null, seqId, stores.values(), status, false);
43134319
}
4314-
// Now delete the content of recovered edits. We're done w/ them.
4315-
if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
4320+
// Now delete the content of recovered edits. We're done w/ them.
4321+
if (conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
43164322
// For debugging data loss issues!
43174323
// If this flag is set, make use of the hfile archiving by making recovered.edits a fake
43184324
// column family. Have to fake out file type too by casting our recovered.edits as storefiles
4319-
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionDir).getName();
4320-
Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
4321-
for (Path file: files) {
4322-
fakeStoreFiles.add(
4323-
new StoreFile(walFS, file, this.conf, null, null));
4325+
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionWALDir).getName();
4326+
Set<StoreFile> fakeStoreFiles = new HashSet<>();
4327+
for (Path file: Iterables.concat(files, filesUnderWrongRegionWALDir)) {
4328+
fakeStoreFiles.add(new StoreFile(walFS, file, conf, null, null));
4329+
}
4330+
for (Path file: filesUnderRootDir) {
4331+
fakeStoreFiles.add(new StoreFile(rootFS, file, conf, null, null));
43244332
}
43254333
getRegionWALFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
43264334
} else {
4327-
if (filesUnderRootDir != null) {
4328-
for (Path file : filesUnderRootDir) {
4329-
if (!rootFS.delete(file, false)) {
4330-
LOG.error("Failed delete of {} under root directory." + file);
4331-
} else {
4332-
LOG.debug("Deleted recovered.edits root directory file=" + file);
4333-
}
4335+
for (Path file : filesUnderRootDir) {
4336+
if (!rootFS.delete(file, false)) {
4337+
LOG.error("Failed delete of " + file + " from under the root directory");
4338+
} else {
4339+
LOG.debug("Deleted recovered.edits under root directory, file=" + file);
43344340
}
43354341
}
4336-
for (Path file: files) {
4342+
for (Path file : Iterables.concat(files, filesUnderWrongRegionWALDir)) {
43374343
if (!walFS.delete(file, false)) {
43384344
LOG.error("Failed delete of " + file);
43394345
} else {
43404346
LOG.debug("Deleted recovered.edits file=" + file);
43414347
}
43424348
}
43434349
}
4344-
return seqid;
4350+
4351+
// We have replayed all the recovered edits. Let's delete the wrong directories introduced
4352+
// in HBASE-20734, see HBASE-22617 for more details.
4353+
FileSystem walFs = getWalFileSystem();
4354+
if (walFs.exists(wrongRegionWALDir)) {
4355+
if (!walFs.delete(wrongRegionWALDir, true)) {
4356+
LOG.warn("Unable to delete " + wrongRegionWALDir);
4357+
}
4358+
}
4359+
4360+
return seqId;
43454361
}
43464362

43474363
private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
@@ -7206,34 +7222,6 @@ public static void addRegionToMETA(final HRegion meta, final HRegion r) throws I
72067222
meta.put(row, HConstants.CATALOG_FAMILY, cells);
72077223
}
72087224

7209-
/**
7210-
* Computes the Path of the HRegion
7211-
*
7212-
* @param tabledir qualified path for table
7213-
* @param name ENCODED region name
7214-
* @return Path of HRegion directory
7215-
* @deprecated For tests only; to be removed.
7216-
*/
7217-
@Deprecated
7218-
public static Path getRegionDir(final Path tabledir, final String name) {
7219-
return new Path(tabledir, name);
7220-
}
7221-
7222-
/**
7223-
* Computes the Path of the HRegion
7224-
*
7225-
* @param rootdir qualified path of HBase root directory
7226-
* @param info HRegionInfo for the region
7227-
* @return qualified path of region directory
7228-
* @deprecated For tests only; to be removed.
7229-
*/
7230-
@Deprecated
7231-
@VisibleForTesting
7232-
public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
7233-
return new Path(
7234-
FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
7235-
}
7236-
72377225
/**
72387226
* Determines if the specified row is within the row range specified by the
72397227
* specified HRegionInfo

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -640,19 +640,26 @@ Path commitDaughterRegion(final HRegionInfo regionInfo)
640640
/**
641641
* Create the region splits directory.
642642
*/
643-
void createSplitsDir() throws IOException {
643+
void createSplitsDir(HRegionInfo daughterA, HRegionInfo daughterB) throws IOException {
644644
Path splitdir = getSplitsDir();
645645
if (fs.exists(splitdir)) {
646646
LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it");
647647
if (!deleteDir(splitdir)) {
648-
throw new IOException("Failed deletion of " + splitdir
649-
+ " before creating them again.");
648+
throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
650649
}
651650
}
652651
// splitDir doesn't exists now. No need to do an exists() call for it.
653652
if (!createDir(splitdir)) {
654653
throw new IOException("Failed create of " + splitdir);
655654
}
655+
Path daughterATmpDir = getSplitsDir(daughterA);
656+
if (!createDir(daughterATmpDir)) {
657+
throw new IOException("Failed create of " + daughterATmpDir);
658+
}
659+
Path daughterBTmpDir = getSplitsDir(daughterB);
660+
if (!createDir(daughterBTmpDir)) {
661+
throw new IOException("Failed create of " + daughterBTmpDir);
662+
}
656663
}
657664

658665
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransactionImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ public PairOfSameType<Region> stepsBeforePONR(final Server server,
351351
hri_b, std);
352352
}
353353

354-
this.parent.getRegionFileSystem().createSplitsDir();
354+
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
355355

356356
transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
357357

hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,8 +648,9 @@ private void restoreReferenceFile(final Path familyDir, final HRegionInfo region
648648
if (linkPath != null) {
649649
in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
650650
} else {
651-
linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(),
652-
regionInfo.getEncodedName()), familyDir.getName()), hfileName);
651+
linkPath = new Path(new Path(new Path(snapshotManifest.getSnapshotDir(),
652+
regionInfo.getEncodedName()),
653+
familyDir.getName()), hfileName);
653654
in = fs.open(linkPath);
654655
}
655656
OutputStream out = fs.create(outPath);

0 commit comments

Comments
 (0)