Skip to content

HBASE-22627 Port HBASE-22617 (Recovered WAL directories not getting cleaned up) to branch-1 #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,8 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
Expand Down Expand Up @@ -85,7 +83,7 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo
throws IOException {
Path rootDir = FSUtils.getRootDir(conf);
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
HRegion.getRegionDir(rootDir, info));
FSUtils.getRegionDirFromRootDir(rootDir, info));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
Expand Down Expand Up @@ -486,7 +485,7 @@ public static Path getHFileFromBackReference(final Path rootDir, final Path link
String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
regionPath.getName(), hfileName);
Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
Path regionDir = new Path(linkTableDir, linkRegionName);
return new Path(new Path(regionDir, familyPath.getName()), linkName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ public boolean checkFileSystem() {
return this.walFsOk;
}

protected FileSystem getWALFileSystem() {
public FileSystem getWALFileSystem() {
return this.walFs;
}

Expand Down Expand Up @@ -691,6 +691,4 @@ public void archiveMetaLog(final ServerName serverName) {
LOG.warn("Failed archiving meta log for server " + serverName, ie);
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.exceptions.HBaseException;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
Expand Down Expand Up @@ -323,7 +322,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
for (HRegionInfo hri : regions) {
LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
tempTableDir, HRegion.getRegionDir(tempTableDir, hri.getEncodedName()));
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
}
LOG.debug("Table '" + tableName + "' archived!");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,7 @@ public void migrateMeta() throws IOException {
}

// Since meta table name has changed rename meta region dir from it's old encoding to new one
Path oldMetaRegionDir = HRegion.getRegionDir(rootDir,
new Path(newMetaDir, "1028785192").toString());
Path oldMetaRegionDir = new Path(rootDir, new Path(newMetaDir, "1028785192").toString());
if (fs.exists(oldMetaRegionDir)) {
LOG.info("Migrating meta region " + oldMetaRegionDir + " to " + newMetaRegionDir);
if (!fs.rename(oldMetaRegionDir, newMetaRegionDir)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.protobuf.ByteString;
import com.google.protobuf.Descriptors;
Expand Down Expand Up @@ -4173,7 +4175,7 @@ private void removeNonExistentColumnFamilyForReplay(
if (nonExistentList != null) {
for (byte[] family : nonExistentList) {
// Perhaps schema was changed between crash and replay
LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
LOG.info("No family for " + Bytes.toString(family) + " omit from replay.");
familyMap.remove(family);
}
}
Expand Down Expand Up @@ -4286,62 +4288,76 @@ protected long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
minSeqIdForTheRegion = maxSeqIdInStore;
}
}
long seqid = minSeqIdForTheRegion;
long seqId = minSeqIdForTheRegion;

FileSystem walFS = getWalFileSystem();
Path regionDir = getWALRegionDir();
FileSystem rootFS = getFilesystem();
Path defaultRegionDir = getRegionDir(FSUtils.getRootDir(conf), getRegionInfo());
Path regionDir = FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(conf), getRegionInfo());
Path regionWALDir = getWALRegionDir();
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
getRegionInfo().getEncodedName());

// We made a mistake in HBASE-20734 so we need to do this dirty hack...
NavigableSet<Path> filesUnderWrongRegionWALDir =
WALSplitter.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
filesUnderWrongRegionWALDir, reporter, regionDir));
// This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
// under the root dir even if walDir is set.
NavigableSet<Path> filesUnderRootDir = null;
if (!regionDir.equals(defaultRegionDir)) {
filesUnderRootDir =
WALSplitter.getSplitEditFilesSorted(rootFS, defaultRegionDir);
seqid = Math.max(seqid,
replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS, filesUnderRootDir, reporter,
defaultRegionDir));
}
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionDir);
seqid = Math.max(seqid, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
files, reporter, regionDir));

if (seqid > minSeqIdForTheRegion) {
NavigableSet<Path> filesUnderRootDir = Sets.newTreeSet();
if (!regionWALDir.equals(regionDir)) {
filesUnderRootDir = WALSplitter.getSplitEditFilesSorted(rootFS, regionDir);
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
filesUnderRootDir, reporter, regionDir));
}
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionWALDir);
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
files, reporter, regionWALDir));
if (seqId > minSeqIdForTheRegion) {
// Then we added some edits to memory. Flush and cleanup split edit files.
internalFlushcache(null, seqid, stores.values(), status, false);
internalFlushcache(null, seqId, stores.values(), status, false);
}
// Now delete the content of recovered edits. We're done w/ them.
if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
// Now delete the content of recovered edits. We're done w/ them.
if (conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
// For debugging data loss issues!
// If this flag is set, make use of the hfile archiving by making recovered.edits a fake
// column family. Have to fake out file type too by casting our recovered.edits as storefiles
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionDir).getName();
Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
for (Path file: files) {
fakeStoreFiles.add(
new StoreFile(walFS, file, this.conf, null, null));
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionWALDir).getName();
Set<StoreFile> fakeStoreFiles = new HashSet<>();
for (Path file: Iterables.concat(files, filesUnderWrongRegionWALDir)) {
fakeStoreFiles.add(new StoreFile(walFS, file, conf, null, null));
}
for (Path file: filesUnderRootDir) {
fakeStoreFiles.add(new StoreFile(rootFS, file, conf, null, null));
}
getRegionWALFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
} else {
if (filesUnderRootDir != null) {
for (Path file : filesUnderRootDir) {
if (!rootFS.delete(file, false)) {
LOG.error("Failed delete of {} under root directory." + file);
} else {
LOG.debug("Deleted recovered.edits root directory file=" + file);
}
for (Path file : filesUnderRootDir) {
if (!rootFS.delete(file, false)) {
LOG.error("Failed delete of " + file + " from under the root directory");
} else {
LOG.debug("Deleted recovered.edits under root directory, file=" + file);
}
}
for (Path file: files) {
for (Path file : Iterables.concat(files, filesUnderWrongRegionWALDir)) {
if (!walFS.delete(file, false)) {
LOG.error("Failed delete of " + file);
} else {
LOG.debug("Deleted recovered.edits file=" + file);
}
}
}
return seqid;

// We have replayed all the recovered edits. Let's delete the wrong directories introduced
// in HBASE-20734, see HBASE-22617 for more details.
FileSystem walFs = getWalFileSystem();
if (walFs.exists(wrongRegionWALDir)) {
if (!walFs.delete(wrongRegionWALDir, true)) {
LOG.warn("Unable to delete " + wrongRegionWALDir);
}
}

return seqId;
}

private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
Expand Down Expand Up @@ -7206,34 +7222,6 @@ public static void addRegionToMETA(final HRegion meta, final HRegion r) throws I
meta.put(row, HConstants.CATALOG_FAMILY, cells);
}

/**
* Computes the Path of the HRegion
*
* @param tabledir qualified path for table
* @param name ENCODED region name
* @return Path of HRegion directory
* @deprecated For tests only; to be removed.
*/
@Deprecated
public static Path getRegionDir(final Path tabledir, final String name) {
return new Path(tabledir, name);
}

/**
* Computes the Path of the HRegion
*
* @param rootdir qualified path of HBase root directory
* @param info HRegionInfo for the region
* @return qualified path of region directory
* @deprecated For tests only; to be removed.
*/
@Deprecated
@VisibleForTesting
public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
return new Path(
FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
}

/**
* Determines if the specified row is within the row range specified by the
* specified HRegionInfo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,19 +640,26 @@ Path commitDaughterRegion(final HRegionInfo regionInfo)
/**
* Create the region splits directory.
*/
void createSplitsDir() throws IOException {
void createSplitsDir(HRegionInfo daughterA, HRegionInfo daughterB) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unclear to me how this is related to the rest of the changes. Could you provide some insight?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Apache9 included this in the branch-2/master patch, including it here in the backport too.

Copy link
Contributor Author

@apurtell apurtell Jun 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this fixed an issue with a hbck test so precommit for the branch-2 version could be green. The daughter directories would not always be created. While technically an unrelated change we allowed it there and should allow it here. Or do an addendum to break it out into a separate commit (here and in branch-2 and up).

Path splitdir = getSplitsDir();
if (fs.exists(splitdir)) {
LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it");
if (!deleteDir(splitdir)) {
throw new IOException("Failed deletion of " + splitdir
+ " before creating them again.");
throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
}
}
// splitDir doesn't exists now. No need to do an exists() call for it.
if (!createDir(splitdir)) {
throw new IOException("Failed create of " + splitdir);
}
Path daughterATmpDir = getSplitsDir(daughterA);
if (!createDir(daughterATmpDir)) {
throw new IOException("Failed create of " + daughterATmpDir);
}
Path daughterBTmpDir = getSplitsDir(daughterB);
if (!createDir(daughterBTmpDir)) {
throw new IOException("Failed create of " + daughterBTmpDir);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ public PairOfSameType<Region> stepsBeforePONR(final Server server,
hri_b, std);
}

this.parent.getRegionFileSystem().createSplitsDir();
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);

transition(SplitTransactionPhase.CREATE_SPLIT_DIR);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,8 +648,9 @@ private void restoreReferenceFile(final Path familyDir, final HRegionInfo region
if (linkPath != null) {
in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
} else {
linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(),
regionInfo.getEncodedName()), familyDir.getName()), hfileName);
linkPath = new Path(new Path(new Path(snapshotManifest.getSnapshotDir(),
regionInfo.getEncodedName()),
familyDir.getName()), hfileName);
in = fs.open(linkPath);
}
OutputStream out = fs.create(outPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.FSProtos;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
Expand Down Expand Up @@ -708,26 +707,36 @@ public static void waitOnSafeMode(final Configuration conf,
* @return the region directory used to store WALs under the WALRootDir
* @throws IOException if there is an exception determining the WALRootDir
*/
public static Path getWALRegionDir(final Configuration conf,
final HRegionInfo regionInfo)
throws IOException {
public static Path getWALRegionDir(final Configuration conf, final HRegionInfo regionInfo)
throws IOException {
return new Path(getWALTableDir(conf, regionInfo.getTable()),
regionInfo.getEncodedName());
}

/**
* Returns the WAL region directory based on the region info
* @param conf configuration to determine WALRootDir
* @param tableName the table name
* @param encodedRegionName the encoded region name
* @return the region directory used to store WALs under the WALRootDir
* @throws IOException if there is an exception determining the WALRootDir
*/
public static Path getWALRegionDir(final Configuration conf, final TableName tableName,
final String encodedRegionName) throws IOException {
return new Path(getWALTableDir(conf, tableName), encodedRegionName);
}

/**
* Checks if meta region exists
*
* @param fs file system
* @param rootdir root directory of HBase installation
* @param rootDir root directory of HBase installation
* @return true if exists
* @throws IOException e
*/
@SuppressWarnings("deprecation")
public static boolean metaRegionExists(FileSystem fs, Path rootdir)
throws IOException {
Path metaRegionDir =
HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO);
public static boolean metaRegionExists(FileSystem fs, Path rootDir) throws IOException {
Path metaRegionDir = getRegionDirFromRootDir(rootDir, HRegionInfo.FIRST_META_REGIONINFO);
return fs.exists(metaRegionDir);
}

Expand Down Expand Up @@ -861,8 +870,22 @@ public static Map<String, Integer> getTableFragmentation(
*/
public static Path getWALTableDir(final Configuration conf, final TableName tableName)
throws IOException {
return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
Path baseDir = new Path(getWALRootDir(conf), HConstants.BASE_NAMESPACE_DIR);
return new Path(new Path(baseDir, tableName.getNamespaceAsString()),
tableName.getQualifierAsString());
}

/**
* For backward compatibility with HBASE-20734, where we store recovered edits in a wrong
* directory without BASE_NAMESPACE_DIR. See HBASE-22617 for more details.
* @deprecated For compatibility, will be removed in 4.0.0.
*/
@Deprecated
public static Path getWrongWALRegionDir(final Configuration conf, final TableName tableName,
final String encodedRegionName) throws IOException {
Path wrongTableDir = new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
tableName.getQualifierAsString());
return new Path(wrongTableDir, encodedRegionName);
}

/**
Expand Down Expand Up @@ -1063,6 +1086,14 @@ protected boolean accept(Path p, @CheckForNull Boolean isDir) {
}
}

public static Path getRegionDirFromRootDir(Path rootDir, HRegionInfo region) {
return getRegionDirFromTableDir(getTableDir(rootDir, region.getTable()), region);
}

public static Path getRegionDirFromTableDir(Path tableDir, HRegionInfo region) {
return new Path(tableDir, ServerRegionReplicaUtil.getRegionInfoForFs(region).getEncodedName());
}

/**
* Given a particular table dir, return all the regiondirs inside it, excluding files such as
* .tableinfo
Expand Down
Loading