Skip to content

Commit 1fb80ef

Browse files
authored
HDFS-17250. EditLogTailer#triggerActiveLogRoll should handle thread Interrupted (#6266). Contributed by Haiyang Hu.
Reviewed-by: ZanderXu <zanderxu@apache.org> Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
1 parent 9a6d00a commit 1fb80ef

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,15 @@ public T call() throws IOException {
612612
private NamenodeProtocol getActiveNodeProxy() throws IOException {
613613
if (cachedActiveProxy == null) {
614614
while (true) {
615+
// If the thread is interrupted, quit by returning null.
616+
if (Thread.currentThread().isInterrupted()) {
617+
LOG.warn("Interrupted while trying to getActiveNodeProxy.");
618+
return null;
619+
}
620+
615621
// if we have reached the max loop count, quit by returning null
616622
if ((nnLoopCount / nnCount) >= maxRetries) {
623+
LOG.warn("Have reached the max loop count ({}).", nnLoopCount);
617624
return null;
618625
}
619626

@@ -638,4 +645,24 @@ private NamenodeProtocol getActiveNodeProxy() throws IOException {
638645
return cachedActiveProxy;
639646
}
640647
}
648+
649+
@VisibleForTesting
650+
public NamenodeProtocol getCachedActiveProxy() {
651+
return cachedActiveProxy;
652+
}
653+
654+
@VisibleForTesting
655+
public long getLastRollTimeMs() {
656+
return lastRollTimeMs;
657+
}
658+
659+
@VisibleForTesting
660+
public RemoteNameNodeInfo getCurrentNN() {
661+
return currentNN;
662+
}
663+
664+
@VisibleForTesting
665+
public void setShouldRunForTest(boolean shouldRun) {
666+
this.tailerThread.setShouldRun(shouldRun);
667+
}
641668
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import java.io.File;
2727
import java.io.IOException;
28+
import java.io.InterruptedIOException;
2829
import java.net.BindException;
2930
import java.net.URI;
3031
import java.util.ArrayList;
@@ -462,6 +463,80 @@ public void testStandbyTriggersLogRollsWhenTailInProgressEdits()
462463
}
463464
}
464465

466+
@Test
467+
public void testRollEditLogHandleThreadInterruption()
468+
throws IOException, InterruptedException, TimeoutException {
469+
Configuration conf = getConf();
470+
// RollEdits timeout 1s.
471+
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_ROLLEDITS_TIMEOUT_KEY, 1);
472+
473+
MiniDFSCluster cluster = null;
474+
try {
475+
cluster = createMiniDFSCluster(conf, 3);
476+
cluster.transitionToActive(2);
477+
EditLogTailer tailer = Mockito.spy(
478+
cluster.getNamesystem(0).getEditLogTailer());
479+
480+
// Stop the edit log tail thread for testing.
481+
tailer.setShouldRunForTest(false);
482+
483+
final AtomicInteger invokedTimes = new AtomicInteger(0);
484+
485+
// For nn0 run triggerActiveLogRoll, nns is [nn1,nn2].
486+
// Mock the NameNodeProxy for testing.
487+
// An InterruptedIOException will be thrown when requesting to nn1.
488+
when(tailer.getNameNodeProxy()).thenReturn(
489+
tailer.new MultipleNameNodeProxy<Void>() {
490+
@Override
491+
protected Void doWork() throws IOException {
492+
invokedTimes.getAndIncrement();
493+
if (tailer.getCurrentNN().getNameNodeID().equals("nn1")) {
494+
while (true) {
495+
Thread.yield();
496+
if (Thread.currentThread().isInterrupted()) {
497+
throw new InterruptedIOException("It is an Interrupted IOException.");
498+
}
499+
}
500+
} else {
501+
tailer.getCachedActiveProxy().rollEditLog();
502+
return null;
503+
}
504+
}
505+
}
506+
);
507+
508+
// Record the initial LastRollTimeMs value.
509+
// This time will be updated only when triggerActiveLogRoll is executed successfully.
510+
long initLastRollTimeMs = tailer.getLastRollTimeMs();
511+
512+
// Execute triggerActiveLogRoll for the first time.
513+
// The MultipleNameNodeProxy uses round-robin to look for an active NN to roll the edit log.
514+
// Here, a request will be made to nn1, and the main thread will trigger a Timeout and
515+
// the doWork() method will throw an InterruptedIOException.
516+
// The getActiveNodeProxy() method will determine that the thread is interrupted
517+
// and will return null.
518+
tailer.triggerActiveLogRoll();
519+
520+
// Execute triggerActiveLogRoll for the second time.
521+
// A request will be made to nn2 and the rollEditLog will be successfully finished and
522+
// lastRollTimeMs will be updated.
523+
tailer.triggerActiveLogRoll();
524+
GenericTestUtils.waitFor(new Supplier<Boolean>() {
525+
@Override
526+
public Boolean get() {
527+
return tailer.getLastRollTimeMs() > initLastRollTimeMs;
528+
}
529+
}, 100, 10000);
530+
531+
// The total number of invoked times should be 2.
532+
assertEquals(2, invokedTimes.get());
533+
} finally {
534+
if (cluster != null) {
535+
cluster.shutdown();
536+
}
537+
}
538+
}
539+
465540
private static void waitForStandbyToCatchUpWithInProgressEdits(
466541
final NameNode standby, final long activeTxId,
467542
int maxWaitSec) throws Exception {

0 commit comments

Comments
 (0)