Skip to content

Commit e6ad8c4

Browse files
brumi1024GitHub Enterprise
authored andcommitted
COMPX-18909. YARN-11753. Ensure NM is marked unhealthy if the ProcessBuilder reports an issue with the container-executor (apache#7290) (apache#232)
Change-Id: Iaaa94c8f46faa4feaede27de36e0d94483ae0229
1 parent 190dfe6 commit e6ad8c4

File tree

2 files changed

+38
-30
lines changed

2 files changed

+38
-30
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,12 @@ public void startLocalizer(LocalizerStartContext ctx)
421421
Throwable cause = e.getCause() != null ? e.getCause() : e;
422422
if (cause instanceof IOException) {
423423
IOException io = (IOException) cause;
424-
if (io.getMessage().contains("No such file or directory")) {
424+
String containerExecutorPath = getContainerExecutorExecutablePath(conf);
425+
if (io.getMessage() != null && io.getMessage().contains("Cannot run program \"" +
426+
containerExecutorPath + "\"")) {
425427
throw new ConfigurationException("Application " + appId + " initialization failed" +
426428
"(exitCode=" + exitCode + "). Container executor not found at "
427-
+ getContainerExecutorExecutablePath(conf), e);
429+
+ containerExecutorPath, e);
428430
}
429431
}
430432

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -627,15 +627,17 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
627627
when(context.getEnvironment()).thenReturn(env);
628628
Path workDir = new Path("/tmp");
629629

630+
LocalizerStartContext lsc = new LocalizerStartContext.Builder()
631+
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
632+
.setNmAddr(address)
633+
.setUser(appSubmitter)
634+
.setAppId(appId.toString())
635+
.setLocId("12345")
636+
.setDirsHandler(dirService)
637+
.build();
638+
630639
try {
631-
lce.startLocalizer(new LocalizerStartContext.Builder()
632-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
633-
.setNmAddr(address)
634-
.setUser(appSubmitter)
635-
.setAppId(appId.toString())
636-
.setLocId("12345")
637-
.setDirsHandler(dirService)
638-
.build());
640+
lce.startLocalizer(lsc);
639641
Assert.fail("startLocalizer should have thrown an exception");
640642
} catch (IOException e) {
641643
assertTrue("Unexpected exception " + e,
@@ -647,50 +649,54 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
647649
LinuxContainerExecutor.ExitCode.INVALID_CONFIG_FILE.getExitCode(),
648650
};
649651

650-
for (int i = 0; i < exitCodesToThrow.length; i++) {
651-
int exitCode = exitCodesToThrow[i];
652+
for (int exitCode : exitCodesToThrow) {
652653
doThrow(new PrivilegedOperationException("invalid config", exitCode, null, null))
653654
.when(spyPrivilegedExecutor).executePrivilegedOperation(
654655
any(), any(PrivilegedOperation.class),
655656
any(), any(), anyBoolean(), anyBoolean());
656657

657658
try {
658-
lce.startLocalizer(new LocalizerStartContext.Builder()
659-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
660-
.setNmAddr(address)
661-
.setUser(appSubmitter)
662-
.setAppId(appId.toString())
663-
.setLocId("12345")
664-
.setDirsHandler(dirService)
665-
.build());
659+
lce.startLocalizer(lsc);
666660
Assert.fail("startLocalizer should have thrown a ConfigurationException");
667661
} catch (ConfigurationException e) {
668662
assertTrue("Unexpected exception " + e,
669663
e.getMessage().contains("exitCode=" + exitCode));
670664
}
671665
}
672666

667+
// Assert that we do catch an IOException thrown by the ProcessBuilder.start
668+
// method as a misconfiguration
669+
String containerExecutorPath = lce.getContainerExecutorExecutablePath(conf);
673670
doThrow(new PrivilegedOperationException("IO error",
674-
new IOException("No such file or directory")))
671+
new IOException("Cannot run program \""+ containerExecutorPath + "\"")))
675672
.when(spyPrivilegedExecutor).executePrivilegedOperation(
676673
any(), any(PrivilegedOperation.class),
677674
any(), any(), anyBoolean(), anyBoolean());
678675

679676
try {
680-
lce.startLocalizer(new LocalizerStartContext.Builder()
681-
.setNmPrivateContainerTokens(nmPrivateCTokensPath)
682-
.setNmAddr(address)
683-
.setUser(appSubmitter)
684-
.setAppId(appId.toString())
685-
.setLocId("12345")
686-
.setDirsHandler(dirService)
687-
.build());
688-
Assert.fail("startLocalizer should have thrown a ConfigurationException");
677+
lce.startLocalizer(lsc);
678+
Assert.fail("startLocalizer should have thrown an ConfigurationException");
689679
} catch (ConfigurationException e) {
690680
assertTrue("Unexpected exception " + e,
691681
e.getMessage().contains("Container executor not found"));
692682
}
693683

684+
// Assert that we do not catch every IOException as a misconfiguration
685+
doThrow(new PrivilegedOperationException("IO error",
686+
new IOException("No such file or directory")))
687+
.when(spyPrivilegedExecutor).executePrivilegedOperation(
688+
any(), any(PrivilegedOperation.class),
689+
any(), any(), anyBoolean(), anyBoolean());
690+
691+
try {
692+
lce.startLocalizer(lsc);
693+
Assert.fail("startLocalizer should have thrown an IOException");
694+
} catch (ConfigurationException e) {
695+
Assert.fail("startLocalizer should not have thrown a ConfigurationException");
696+
} catch (IOException e) {
697+
assertTrue("Unexpected exception " + e,
698+
e.getMessage().contains("exitCode"));
699+
}
694700

695701
doThrow(new PrivilegedOperationException("interrupted"))
696702
.when(spyPrivilegedExecutor).executePrivilegedOperation(

0 commit comments

Comments
 (0)