Skip to content

Commit 007eacc

Browse files
sunhellyApache9
authored andcommitted
HBASE-26526 Introduce a timeout to shutdown of WAL (#3297)
Signed-off-by: Andrew Purtell <apurtell@apache.org> (cherry picked from commit ca3ba49)
1 parent 39c4d01 commit 007eacc

File tree

1 file changed

+49
-13
lines changed

1 file changed

+49
-13
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,17 @@
3737
import java.util.Map;
3838
import java.util.OptionalLong;
3939
import java.util.Set;
40+
import java.util.concurrent.Callable;
4041
import java.util.concurrent.ConcurrentHashMap;
4142
import java.util.concurrent.ConcurrentNavigableMap;
4243
import java.util.concurrent.ConcurrentSkipListMap;
4344
import java.util.concurrent.CopyOnWriteArrayList;
4445
import java.util.concurrent.ExecutionException;
4546
import java.util.concurrent.ExecutorService;
4647
import java.util.concurrent.Executors;
48+
import java.util.concurrent.Future;
4749
import java.util.concurrent.TimeUnit;
50+
import java.util.concurrent.TimeoutException;
4851
import java.util.concurrent.atomic.AtomicBoolean;
4952
import java.util.concurrent.atomic.AtomicInteger;
5053
import java.util.concurrent.atomic.AtomicLong;
@@ -142,6 +145,9 @@ public abstract class AbstractFSWAL<W extends WriterBase> implements WAL {
142145
public static final String RING_BUFFER_SLOT_COUNT =
143146
"hbase.regionserver.wal.disruptor.event.count";
144147

148+
public static final String WAL_SHUTDOWN_WAIT_TIMEOUT_MS = "hbase.wal.shutdown.wait.timeout.ms";
149+
public static final int DEFAULT_WAL_SHUTDOWN_WAIT_TIMEOUT_MS = 15 * 1000;
150+
145151
/**
146152
* file system instance
147153
*/
@@ -269,6 +275,9 @@ public abstract class AbstractFSWAL<W extends WriterBase> implements WAL {
269275
protected volatile boolean closed = false;
270276

271277
protected final AtomicBoolean shutdown = new AtomicBoolean(false);
278+
279+
protected final long walShutdownTimeout;
280+
272281
/**
273282
* WAL Comparator; it compares the timestamp (log filenum), present in the log file name. Throws
274283
* an IllegalArgumentException if used to compare paths from different wals.
@@ -320,8 +329,8 @@ public WalProps(Map<byte[], Long> encodedName2HighestSequenceId, long logSize) {
320329

321330
protected final AtomicBoolean rollRequested = new AtomicBoolean(false);
322331

323-
private final ExecutorService logArchiveExecutor = Executors.newSingleThreadExecutor(
324-
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Log-Archiver-%d").build());
332+
private final ExecutorService logArchiveOrShutdownExecutor = Executors.newSingleThreadExecutor(
333+
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WAL-Archive-Or-Shutdown-%d").build());
325334

326335
private final int archiveRetries;
327336

@@ -478,7 +487,9 @@ public boolean accept(final Path fileName) {
478487
this.syncFutureCache = new SyncFutureCache(conf);
479488
this.implClassName = getClass().getSimpleName();
480489
this.useHsync = conf.getBoolean(HRegion.WAL_HSYNC_CONF_KEY, HRegion.DEFAULT_WAL_HSYNC);
481-
archiveRetries = this.conf.getInt("hbase.regionserver.logroll.archive.retries", 0);
490+
archiveRetries = this.conf.getInt("hbase.regionserver.walroll.archive.retries", 0);
491+
this.walShutdownTimeout =
492+
conf.getLong(WAL_SHUTDOWN_WAIT_TIMEOUT_MS, DEFAULT_WAL_SHUTDOWN_WAIT_TIMEOUT_MS);
482493
}
483494

484495
/**
@@ -685,7 +696,7 @@ private void cleanOldLogs() throws IOException {
685696
final List<Pair<Path, Long>> localLogsToArchive = logsToArchive;
686697
// make it async
687698
for (Pair<Path, Long> log : localLogsToArchive) {
688-
logArchiveExecutor.execute(() -> {
699+
logArchiveOrShutdownExecutor.execute(() -> {
689700
archive(log);
690701
});
691702
this.walFile2Props.remove(log.getFirst());
@@ -891,17 +902,42 @@ public void shutdown() throws IOException {
891902
i.logCloseRequested();
892903
}
893904
}
894-
rollWriterLock.lock();
895-
try {
896-
doShutdown();
897-
if (syncFutureCache != null) {
898-
syncFutureCache.clear();
905+
906+
Future<Void> future = logArchiveOrShutdownExecutor.submit(new Callable<Void>() {
907+
@Override
908+
public Void call() throws Exception {
909+
if (rollWriterLock.tryLock(walShutdownTimeout, TimeUnit.SECONDS)) {
910+
try {
911+
doShutdown();
912+
if (syncFutureCache != null) {
913+
syncFutureCache.clear();
914+
}
915+
} finally {
916+
rollWriterLock.unlock();
917+
}
918+
} else {
919+
throw new IOException("Waiting for rollWriterLock timeout");
920+
}
921+
return null;
899922
}
900-
if (logArchiveExecutor != null) {
901-
logArchiveExecutor.shutdownNow();
923+
});
924+
logArchiveOrShutdownExecutor.shutdown();
925+
926+
try {
927+
future.get(walShutdownTimeout, TimeUnit.MILLISECONDS);
928+
} catch (InterruptedException e) {
929+
throw new InterruptedIOException("Interrupted when waiting for shutdown WAL");
930+
} catch (TimeoutException e) {
931+
throw new TimeoutIOException("We have waited " + walShutdownTimeout + "ms, but"
932+
+ " the shutdown of WAL doesn't complete! Please check the status of underlying "
933+
+ "filesystem or increase the wait time by the config \"" + WAL_SHUTDOWN_WAIT_TIMEOUT_MS
934+
+ "\"", e);
935+
} catch (ExecutionException e) {
936+
if (e.getCause() instanceof IOException) {
937+
throw (IOException) e.getCause();
938+
} else {
939+
throw new IOException(e.getCause());
902940
}
903-
} finally {
904-
rollWriterLock.unlock();
905941
}
906942
}
907943

0 commit comments

Comments
 (0)