Skip to content

Commit 213ea03

Browse files
KevinWikantKevin Wikant
andauthored
YARN-11210. Fix YARN RMAdminCLI retry logic for non-retryable kerbero… (#4563)
Co-authored-by: Kevin Wikant <wikak@amazon.com>
1 parent 01a2e0f commit 213ea03

File tree

6 files changed

+31
-15
lines changed

6 files changed

+31
-15
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,20 @@ public static final RetryPolicy retryByRemoteException(
181181
}
182182

183183
/**
184-
* A retry policy for exceptions other than RemoteException.
184+
* <p>
185+
* A retry policy where RemoteException and SaslException are not retried, other individual
186+
* exception types can have RetryPolicy overrides, and any other exception type without an
187+
* override is not retried.
188+
* </p>
189+
*
185190
* @param defaultPolicy defaultPolicy.
186191
* @param exceptionToPolicyMap exceptionToPolicyMap.
187192
* @return RetryPolicy.
188193
*/
189-
public static final RetryPolicy retryOtherThanRemoteException(
194+
public static final RetryPolicy retryOtherThanRemoteAndSaslException(
190195
RetryPolicy defaultPolicy,
191196
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
192-
return new OtherThanRemoteExceptionDependentRetry(defaultPolicy,
197+
return new OtherThanRemoteAndSaslExceptionDependentRetry(defaultPolicy,
193198
exceptionToPolicyMap);
194199
}
195200

@@ -589,12 +594,12 @@ public RetryAction shouldRetry(Exception e, int retries, int failovers,
589594
}
590595
}
591596

592-
static class OtherThanRemoteExceptionDependentRetry implements RetryPolicy {
597+
static class OtherThanRemoteAndSaslExceptionDependentRetry implements RetryPolicy {
593598

594599
private RetryPolicy defaultPolicy;
595600
private Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap;
596601

597-
public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
602+
OtherThanRemoteAndSaslExceptionDependentRetry(RetryPolicy defaultPolicy,
598603
Map<Class<? extends Exception>,
599604
RetryPolicy> exceptionToPolicyMap) {
600605
this.defaultPolicy = defaultPolicy;
@@ -605,10 +610,8 @@ public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
605610
public RetryAction shouldRetry(Exception e, int retries, int failovers,
606611
boolean isIdempotentOrAtMostOnce) throws Exception {
607612
RetryPolicy policy = null;
608-
// ignore Remote Exception
609-
if (e instanceof RemoteException) {
610-
// do nothing
611-
} else {
613+
// ignore RemoteException and SaslException
614+
if (!(e instanceof RemoteException || isSaslFailure(e))) {
612615
policy = exceptionToPolicyMap.get(e.getClass());
613616
}
614617
if (policy == null) {

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161

6262
import javax.net.SocketFactory;
6363
import javax.security.sasl.Sasl;
64+
import javax.security.sasl.SaslException;
6465
import java.io.*;
6566
import java.net.*;
6667
import java.nio.ByteBuffer;
@@ -1620,7 +1621,8 @@ private Writable getRpcResponse(final Call call, final Connection connection,
16201621
}
16211622

16221623
if (call.error != null) {
1623-
if (call.error instanceof RemoteException) {
1624+
if (call.error instanceof RemoteException ||
1625+
call.error instanceof SaslException) {
16241626
call.error.fillInStackTrace();
16251627
throw call.error;
16261628
} else { // local exception

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,14 @@ private SaslClient createSaslClient(SaslAuth authType)
237237
LOG.debug("client isn't using kerberos");
238238
return null;
239239
}
240-
String serverPrincipal = getServerPrincipal(authType);
240+
final String serverPrincipal;
241+
try {
242+
serverPrincipal = getServerPrincipal(authType);
243+
} catch (IllegalArgumentException ex) {
244+
// YARN-11210: getServerPrincipal can throw IllegalArgumentException if Kerberos
245+
// configuration is bad, this is surfaced as a non-retryable SaslException
246+
throw new SaslException("Bad Kerberos server principal configuration", ex);
247+
}
241248
if (serverPrincipal == null) {
242249
LOG.debug("protocol doesn't use kerberos");
243250
return null;

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ public void testRetryOtherThanRemoteException() throws Throwable {
291291

292292
UnreliableInterface unreliable = (UnreliableInterface)
293293
RetryProxy.create(UnreliableInterface.class, unreliableImpl,
294-
retryOtherThanRemoteException(TRY_ONCE_THEN_FAIL,
294+
retryOtherThanRemoteAndSaslException(TRY_ONCE_THEN_FAIL,
295295
exceptionToPolicyMap));
296296
// should retry with local IOException.
297297
unreliable.failsOnceWithIOException();

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,12 @@ public static boolean isFederationEnabled(Configuration conf) {
6464
* configuration; else false.
6565
*/
6666
public static boolean isFederationFailoverEnabled(Configuration conf) {
67-
return conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
68-
YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
67+
// Federation failover is not enabled unless federation is enabled. This previously caused
68+
// YARN RMProxy to use the HA Retry policy in a non-HA & non-federation environments because
69+
// the default federation failover enabled value is true.
70+
return isFederationEnabled(conf) &&
71+
conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
72+
YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
6973
}
7074

7175
/**

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ protected static RetryPolicy createRetryPolicy(Configuration conf,
300300
// YARN-4288: local IOException is also possible.
301301
exceptionToPolicyMap.put(IOException.class, retryPolicy);
302302
// Not retry on remote IO exception.
303-
return RetryPolicies.retryOtherThanRemoteException(
303+
return RetryPolicies.retryOtherThanRemoteAndSaslException(
304304
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
305305
}
306306
}

0 commit comments

Comments
 (0)