Skip to content

Commit b67c16a

Browse files
authored
HBASE-26866 Shutdown WAL may abort region server (#4254)
Signed-off-by: Xiaolin Ha <haxiaolin@apache.org>
1 parent b3f00d0 commit b67c16a

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/AbstractFSWAL.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
import java.util.concurrent.ExecutorService;
4949
import java.util.concurrent.Executors;
5050
import java.util.concurrent.Future;
51+
import java.util.concurrent.LinkedBlockingQueue;
52+
import java.util.concurrent.ThreadPoolExecutor;
5153
import java.util.concurrent.TimeUnit;
5254
import java.util.concurrent.TimeoutException;
5355
import java.util.concurrent.atomic.AtomicBoolean;
@@ -345,8 +347,12 @@ public WalProps(Map<byte[], Long> encodedName2HighestSequenceId, long logSize) {
345347

346348
protected final AtomicBoolean rollRequested = new AtomicBoolean(false);
347349

348-
private final ExecutorService logArchiveOrShutdownExecutor = Executors.newSingleThreadExecutor(
349-
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WAL-Archive-Or-Shutdown-%d").build());
350+
// Run in caller if we get reject execution exception, to avoid aborting region server when we get
351+
// reject execution exception. Usually this should not happen but let's make it more robust.
352+
private final ExecutorService logArchiveExecutor =
353+
new ThreadPoolExecutor(1, 1, 1L, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>(),
354+
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WAL-Archive-%d").build(),
355+
new ThreadPoolExecutor.CallerRunsPolicy());
350356

351357
private final int archiveRetries;
352358

@@ -770,7 +776,7 @@ private void cleanOldLogs() throws IOException {
770776
final List<Pair<Path, Long>> localLogsToArchive = logsToArchive;
771777
// make it async
772778
for (Pair<Path, Long> log : localLogsToArchive) {
773-
logArchiveOrShutdownExecutor.execute(() -> {
779+
logArchiveExecutor.execute(() -> {
774780
archive(log);
775781
});
776782
this.walFile2Props.remove(log.getFirst());
@@ -985,7 +991,10 @@ public void shutdown() throws IOException {
985991
}
986992
}
987993

988-
Future<Void> future = logArchiveOrShutdownExecutor.submit(new Callable<Void>() {
994+
ExecutorService shutdownExecutor = Executors.newSingleThreadExecutor(
995+
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WAL-Shutdown-%d").build());
996+
997+
Future<Void> future = shutdownExecutor.submit(new Callable<Void>() {
989998
@Override
990999
public Void call() throws Exception {
9911000
if (rollWriterLock.tryLock(walShutdownTimeout, TimeUnit.SECONDS)) {
@@ -1003,7 +1012,7 @@ public Void call() throws Exception {
10031012
return null;
10041013
}
10051014
});
1006-
logArchiveOrShutdownExecutor.shutdown();
1015+
shutdownExecutor.shutdown();
10071016

10081017
try {
10091018
future.get(walShutdownTimeout, TimeUnit.MILLISECONDS);
@@ -1020,6 +1029,12 @@ public Void call() throws Exception {
10201029
} else {
10211030
throw new IOException(e.getCause());
10221031
}
1032+
} finally {
1033+
// in shutdown we may call cleanOldLogs so shutdown this executor in the end.
1034+
// In sync replication implementation, we may shutdown a WAL without shutting down the whole
1035+
// region server, if we shutdown this executor earlier we may get reject execution exception
1036+
// and abort the region server
1037+
logArchiveExecutor.shutdown();
10231038
}
10241039
}
10251040

0 commit comments

Comments
 (0)