Skip to content

Commit a172b48

Browse files
committed
HBASE-22617 Recovered WAL directories not getting cleaned up (#330)
Signed-off-by: Guanghao Zhang <zghao@apache.org> Signed-off-by: Andrew Purtell <apurtell@apache.org>
1 parent a1aab95 commit a172b48

File tree

18 files changed

+244
-169
lines changed

18 files changed

+244
-169
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import java.util.Locale;
2929
import java.util.Map;
3030
import java.util.concurrent.ConcurrentHashMap;
31-
3231
import org.apache.hadoop.HadoopIllegalArgumentException;
3332
import org.apache.hadoop.conf.Configuration;
3433
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -429,11 +428,9 @@ private static boolean isValidWALRootDir(Path walDir, final Configuration c) thr
429428
* @return the region directory used to store WALs under the WALRootDir
430429
* @throws IOException if there is an exception determining the WALRootDir
431430
*/
432-
public static Path getWALRegionDir(final Configuration conf,
433-
final TableName tableName, final String encodedRegionName)
434-
throws IOException {
435-
return new Path(getWALTableDir(conf, tableName),
436-
encodedRegionName);
431+
public static Path getWALRegionDir(final Configuration conf, final TableName tableName,
432+
final String encodedRegionName) throws IOException {
433+
return new Path(getWALTableDir(conf, tableName), encodedRegionName);
437434
}
438435

439436
/**
@@ -445,8 +442,22 @@ public static Path getWALRegionDir(final Configuration conf,
445442
*/
446443
public static Path getWALTableDir(final Configuration conf, final TableName tableName)
447444
throws IOException {
448-
return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
449-
tableName.getQualifierAsString());
445+
Path baseDir = new Path(getWALRootDir(conf), HConstants.BASE_NAMESPACE_DIR);
446+
return new Path(new Path(baseDir, tableName.getNamespaceAsString()),
447+
tableName.getQualifierAsString());
448+
}
449+
450+
/**
451+
* For backward compatibility with HBASE-20734, where we store recovered edits in a wrong
452+
* directory without BASE_NAMESPACE_DIR. See HBASE-22617 for more details.
453+
* @deprecated For compatibility, will be removed in 4.0.0.
454+
*/
455+
@Deprecated
456+
public static Path getWrongWALRegionDir(final Configuration conf, final TableName tableName,
457+
final String encodedRegionName) throws IOException {
458+
Path wrongTableDir = new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
459+
tableName.getQualifierAsString());
460+
return new Path(wrongTableDir, encodedRegionName);
450461
}
451462

452463
/**
@@ -1059,5 +1070,4 @@ public StreamLacksCapabilityException(String message) {
10591070
super(message);
10601071
}
10611072
}
1062-
10631073
}

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import org.apache.hadoop.fs.Path;
3939
import org.apache.hadoop.fs.PathFilter;
4040
import org.apache.hadoop.hbase.client.RegionInfo;
41-
import org.apache.hadoop.hbase.regionserver.HRegion;
4241
import org.apache.hadoop.hbase.regionserver.HStoreFile;
4342
import org.apache.hadoop.hbase.util.Bytes;
4443
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -87,23 +86,21 @@ private HFileArchiver() {
8786
public static boolean exists(Configuration conf, FileSystem fs, RegionInfo info)
8887
throws IOException {
8988
Path rootDir = FSUtils.getRootDir(conf);
90-
Path regionDir = HRegion.getRegionDir(rootDir, info);
89+
Path regionDir = FSUtils.getRegionDirFromRootDir(rootDir, info);
9190
return fs.exists(regionDir);
9291
}
9392

9493
/**
95-
* Cleans up all the files for a HRegion by archiving the HFiles to the
96-
* archive directory
94+
* Cleans up all the files for a HRegion by archiving the HFiles to the archive directory
9795
* @param conf the configuration to use
9896
* @param fs the file system object
9997
* @param info RegionInfo for region to be deleted
100-
* @throws IOException
10198
*/
10299
public static void archiveRegion(Configuration conf, FileSystem fs, RegionInfo info)
103100
throws IOException {
104101
Path rootDir = FSUtils.getRootDir(conf);
105102
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
106-
HRegion.getRegionDir(rootDir, info));
103+
FSUtils.getRegionDirFromRootDir(rootDir, info));
107104
}
108105

109106
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,9 @@ public FileSystem getFileSystem() {
194194
return this.fs;
195195
}
196196

197-
protected FileSystem getWALFileSystem() { return this.walFs; }
197+
public FileSystem getWALFileSystem() {
198+
return this.walFs;
199+
}
198200

199201
public Configuration getConfiguration() {
200202
return this.conf;
@@ -220,7 +222,7 @@ public Path getWALRootDir() {
220222
* @return the directory for a give {@code region}.
221223
*/
222224
public Path getRegionDir(RegionInfo region) {
223-
return FSUtils.getRegionDir(FSUtils.getTableDir(getRootDir(), region.getTable()), region);
225+
return FSUtils.getRegionDirFromRootDir(getRootDir(), region);
224226
}
225227

226228
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCRegionProcedure.java

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,26 @@
1818
package org.apache.hadoop.hbase.master.assignment;
1919

2020
import java.io.IOException;
21-
2221
import org.apache.hadoop.fs.FileSystem;
22+
import org.apache.hadoop.fs.Path;
2323
import org.apache.hadoop.hbase.MetaTableAccessor;
2424
import org.apache.hadoop.hbase.backup.HFileArchiver;
2525
import org.apache.hadoop.hbase.client.RegionInfo;
2626
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
27+
import org.apache.hadoop.hbase.master.MasterFileSystem;
2728
import org.apache.hadoop.hbase.master.MasterServices;
2829
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
2930
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
3031
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
3132
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
3233
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
34+
import org.apache.hadoop.hbase.util.FSUtils;
3335
import org.apache.yetus.audience.InterfaceAudience;
3436
import org.slf4j.Logger;
3537
import org.slf4j.LoggerFactory;
38+
3639
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
40+
3741
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
3842
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
3943
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.GCRegionState;
@@ -64,46 +68,65 @@ public TableOperationType getTableOperationType() {
6468

6569
@Override
6670
protected Flow executeFromState(MasterProcedureEnv env, GCRegionState state)
67-
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
71+
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
6872
if (LOG.isTraceEnabled()) {
6973
LOG.trace(this + " execute state=" + state);
7074
}
7175
MasterServices masterServices = env.getMasterServices();
7276
try {
7377
switch (state) {
74-
case GC_REGION_PREPARE:
75-
// Nothing to do to prepare.
76-
setNextState(GCRegionState.GC_REGION_ARCHIVE);
77-
break;
78-
case GC_REGION_ARCHIVE:
79-
FileSystem fs = masterServices.getMasterFileSystem().getFileSystem();
80-
if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
81-
if (LOG.isDebugEnabled()) LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
82-
HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
83-
}
84-
setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
85-
break;
86-
case GC_REGION_PURGE_METADATA:
87-
// TODO: Purge metadata before removing from HDFS? This ordering is copied
88-
// from CatalogJanitor.
89-
AssignmentManager am = masterServices.getAssignmentManager();
90-
if (am != null) {
91-
if (am.getRegionStates() != null) {
92-
am.getRegionStates().deleteRegion(getRegion());
78+
case GC_REGION_PREPARE:
79+
// Nothing to do to prepare.
80+
setNextState(GCRegionState.GC_REGION_ARCHIVE);
81+
break;
82+
case GC_REGION_ARCHIVE:
83+
MasterFileSystem mfs = masterServices.getMasterFileSystem();
84+
FileSystem fs = mfs.getFileSystem();
85+
if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
86+
if (LOG.isDebugEnabled()) {
87+
LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
88+
}
89+
HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
90+
}
91+
FileSystem walFs = mfs.getWALFileSystem();
92+
// Cleanup the directories on WAL filesystem also
93+
Path regionWALDir = FSUtils.getWALRegionDir(env.getMasterConfiguration(),
94+
getRegion().getTable(), getRegion().getEncodedName());
95+
if (walFs.exists(regionWALDir)) {
96+
if (!walFs.delete(regionWALDir, true)) {
97+
LOG.debug("Failed to delete {}", regionWALDir);
98+
}
99+
}
100+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(env.getMasterConfiguration(),
101+
getRegion().getTable(), getRegion().getEncodedName());
102+
if (walFs.exists(wrongRegionWALDir)) {
103+
if (!walFs.delete(wrongRegionWALDir, true)) {
104+
LOG.debug("Failed to delete {}", regionWALDir);
105+
}
106+
}
107+
setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
108+
break;
109+
case GC_REGION_PURGE_METADATA:
110+
// TODO: Purge metadata before removing from HDFS? This ordering is copied
111+
// from CatalogJanitor.
112+
AssignmentManager am = masterServices.getAssignmentManager();
113+
if (am != null) {
114+
if (am.getRegionStates() != null) {
115+
am.getRegionStates().deleteRegion(getRegion());
116+
}
117+
}
118+
MetaTableAccessor.deleteRegion(masterServices.getConnection(), getRegion());
119+
masterServices.getServerManager().removeRegion(getRegion());
120+
FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
121+
if (fnm != null) {
122+
fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
93123
}
94-
}
95-
MetaTableAccessor.deleteRegion(masterServices.getConnection(), getRegion());
96-
masterServices.getServerManager().removeRegion(getRegion());
97-
FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
98-
if (fnm != null) {
99-
fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
100-
}
101-
return Flow.NO_MORE_STATE;
102-
default:
103-
throw new UnsupportedOperationException(this + " unhandled state=" + state);
124+
return Flow.NO_MORE_STATE;
125+
default:
126+
throw new UnsupportedOperationException(this + " unhandled state=" + state);
104127
}
105128
} catch (IOException ioe) {
106-
// TODO: This is going to spew log?
129+
// TODO: This is going to spew log? Add retry backoff
107130
LOG.warn("Error trying to GC " + getRegion().getShortNameToLog() + "; retrying...", ioe);
108131
}
109132
return Flow.HAS_MORE_STATE;

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -843,16 +843,16 @@ private ServerName getServerName(final MasterProcedureEnv env) {
843843
}
844844

845845
private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
846-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
846+
MasterFileSystem fs = env.getMasterFileSystem();
847847
long maxSequenceId = -1L;
848848
for (RegionInfo region : regionsToMerge) {
849849
maxSequenceId =
850-
Math.max(maxSequenceId, WALSplitter.getMaxRegionSequenceId(
851-
walFS, getWALRegionDir(env, region)));
850+
Math.max(maxSequenceId, WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(),
851+
region, fs::getFileSystem, fs::getWALFileSystem));
852852
}
853853
if (maxSequenceId > 0) {
854-
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, mergedRegion),
855-
maxSequenceId);
854+
WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
855+
getWALRegionDir(env, mergedRegion), maxSequenceId);
856856
}
857857
}
858858

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import java.io.IOException;
2121
import java.util.Collections;
2222
import java.util.List;
23-
import org.apache.hadoop.fs.FileSystem;
2423
import org.apache.hadoop.hbase.Cell;
2524
import org.apache.hadoop.hbase.CellBuilderFactory;
2625
import org.apache.hadoop.hbase.CellBuilderType;
@@ -35,13 +34,13 @@
3534
import org.apache.hadoop.hbase.client.Result;
3635
import org.apache.hadoop.hbase.client.Table;
3736
import org.apache.hadoop.hbase.client.TableDescriptor;
37+
import org.apache.hadoop.hbase.master.MasterFileSystem;
3838
import org.apache.hadoop.hbase.master.MasterServices;
3939
import org.apache.hadoop.hbase.master.RegionState.State;
4040
import org.apache.hadoop.hbase.procedure2.Procedure;
4141
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
4242
import org.apache.hadoop.hbase.util.Bytes;
4343
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
44-
import org.apache.hadoop.hbase.util.FSUtils;
4544
import org.apache.hadoop.hbase.wal.WALSplitter;
4645
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
4746
import org.apache.yetus.audience.InterfaceAudience;
@@ -217,10 +216,9 @@ private void updateRegionLocation(RegionInfo regionInfo, State state, Put put)
217216
}
218217

219218
private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException {
220-
FileSystem walFS = master.getMasterWalManager().getFileSystem();
221-
long maxSeqId =
222-
WALSplitter.getMaxRegionSequenceId(walFS, FSUtils.getWALRegionDir(
223-
master.getConfiguration(), region.getTable(), region.getEncodedName()));
219+
MasterFileSystem fs = master.getMasterFileSystem();
220+
long maxSeqId = WALSplitter.getMaxRegionSequenceId(master.getConfiguration(), region,
221+
fs::getFileSystem, fs::getWALFileSystem);
224222
return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM;
225223
}
226224

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import java.util.concurrent.Future;
3535
import java.util.concurrent.TimeUnit;
3636
import java.util.stream.Collectors;
37-
3837
import org.apache.hadoop.conf.Configuration;
3938
import org.apache.hadoop.fs.FileSystem;
4039
import org.apache.hadoop.fs.Path;
@@ -596,7 +595,7 @@ public void createDaughterRegions(final MasterProcedureEnv env) throws IOExcepti
596595
final FileSystem fs = mfs.getFileSystem();
597596
HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
598597
env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false);
599-
regionFs.createSplitsDir();
598+
regionFs.createSplitsDir(daughter_1_RI, daughter_2_RI);
600599

601600
Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
602601

@@ -903,14 +902,14 @@ private int getRegionReplication(final MasterProcedureEnv env) throws IOExceptio
903902
}
904903

905904
private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException {
906-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
907-
long maxSequenceId =
908-
WALSplitter.getMaxRegionSequenceId(walFS, getWALRegionDir(env, getParentRegion()));
905+
MasterFileSystem fs = env.getMasterFileSystem();
906+
long maxSequenceId = WALSplitter.getMaxRegionSequenceId(env.getMasterConfiguration(),
907+
getParentRegion(), fs::getFileSystem, fs::getWALFileSystem);
909908
if (maxSequenceId > 0) {
910-
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_1_RI),
911-
maxSequenceId);
912-
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_2_RI),
913-
maxSequenceId);
909+
WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
910+
getWALRegionDir(env, daughter_1_RI), maxSequenceId);
911+
WALSplitter.writeRegionSequenceIdFile(fs.getWALFileSystem(),
912+
getWALRegionDir(env, daughter_2_RI), maxSequenceId);
914913
}
915914
}
916915

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -314,12 +314,11 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
314314

315315
// Archive regions from FS (temp directory)
316316
if (archive) {
317-
List<Path> regionDirList = regions.stream()
318-
.filter(RegionReplicaUtil::isDefaultReplica)
319-
.map(region -> FSUtils.getRegionDir(tempTableDir, region))
317+
List<Path> regionDirList = regions.stream().filter(RegionReplicaUtil::isDefaultReplica)
318+
.map(region -> FSUtils.getRegionDirFromTableDir(tempTableDir, region))
320319
.collect(Collectors.toList());
321-
HFileArchiver.archiveRegions(env.getMasterConfiguration(), fs, mfs.getRootDir(),
322-
tempTableDir, regionDirList);
320+
HFileArchiver.archiveRegions(env.getMasterConfiguration(), fs, mfs.getRootDir(), tempTableDir,
321+
regionDirList);
323322
LOG.debug("Table '{}' archived!", tableName);
324323
}
325324

@@ -343,6 +342,13 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
343342
throw new IOException("Couldn't delete mob dir " + mobTableDir);
344343
}
345344
}
345+
346+
// Delete the directory on wal filesystem
347+
FileSystem walFs = mfs.getWALFileSystem();
348+
Path tableWALDir = FSUtils.getWALTableDir(env.getMasterConfiguration(), tableName);
349+
if (walFs.exists(tableWALDir) && !walFs.delete(tableWALDir, true)) {
350+
throw new IOException("Couldn't delete table dir on wal filesystem" + tableWALDir);
351+
}
346352
}
347353

348354
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
package org.apache.hadoop.hbase.master.procedure;
2020

2121
import java.io.IOException;
22-
import org.apache.hadoop.fs.FileSystem;
2322
import org.apache.hadoop.hbase.HBaseIOException;
2423
import org.apache.hadoop.hbase.HConstants;
2524
import org.apache.hadoop.hbase.MetaTableAccessor;
@@ -31,6 +30,7 @@
3130
import org.apache.hadoop.hbase.client.TableState;
3231
import org.apache.hadoop.hbase.constraint.ConstraintException;
3332
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
33+
import org.apache.hadoop.hbase.master.MasterFileSystem;
3434
import org.apache.hadoop.hbase.master.TableStateManager;
3535
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
3636
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -113,13 +113,13 @@ protected Flow executeFromState(final MasterProcedureEnv env, final DisableTable
113113
case DISABLE_TABLE_ADD_REPLICATION_BARRIER:
114114
if (env.getMasterServices().getTableDescriptors().get(tableName)
115115
.hasGlobalReplicationScope()) {
116-
FileSystem walFS = env.getMasterServices().getMasterWalManager().getFileSystem();
116+
MasterFileSystem fs = env.getMasterFileSystem();
117117
try (BufferedMutator mutator = env.getMasterServices().getConnection()
118118
.getBufferedMutator(TableName.META_TABLE_NAME)) {
119119
for (RegionInfo region : env.getAssignmentManager().getRegionStates()
120120
.getRegionsOfTable(tableName)) {
121-
long maxSequenceId =
122-
WALSplitter.getMaxRegionSequenceId(walFS, getWALRegionDir(env, region));
121+
long maxSequenceId = WALSplitter.getMaxRegionSequenceId(
122+
env.getMasterConfiguration(), region, fs::getFileSystem, fs::getWALFileSystem);
123123
long openSeqNum = maxSequenceId > 0 ? maxSequenceId + 1 : HConstants.NO_SEQNUM;
124124
mutator.mutate(MetaTableAccessor.makePutForReplicationBarrier(region, openSeqNum,
125125
EnvironmentEdgeManager.currentTime()));

0 commit comments

Comments
 (0)