@@ -227,35 +227,29 @@ protected ApplicationClientProtocol getClientRMProxyForSubCluster(
227
227
ApplicationClientProtocol clientRMProxy = null ;
228
228
try {
229
229
boolean serviceAuthEnabled = getConf ().getBoolean (
230
- CommonConfigurationKeys .HADOOP_SECURITY_AUTHORIZATION , false );
230
+ CommonConfigurationKeys .HADOOP_SECURITY_AUTHORIZATION , false );
231
231
UserGroupInformation realUser = user ;
232
232
if (serviceAuthEnabled ) {
233
233
realUser = UserGroupInformation .createProxyUser (
234
- user .getShortUserName (), UserGroupInformation .getLoginUser ());
234
+ user .getShortUserName (), UserGroupInformation .getLoginUser ());
235
235
}
236
236
clientRMProxy = FederationProxyProviderUtil .createRMProxy (getConf (),
237
237
ApplicationClientProtocol .class , subClusterId , realUser );
238
238
} catch (Exception e ) {
239
239
RouterServerUtil .logAndThrowException (
240
- "Unable to create the interface to reach the SubCluster "
241
- + subClusterId ,
242
- e );
240
+ "Unable to create the interface to reach the SubCluster " + subClusterId , e );
243
241
}
244
-
245
242
clientRMProxies .put (subClusterId , clientRMProxy );
246
243
return clientRMProxy ;
247
244
}
248
245
249
246
private SubClusterId getRandomActiveSubCluster (
250
- Map <SubClusterId , SubClusterInfo > activeSubclusters )
251
- throws YarnException {
252
-
253
- if (activeSubclusters == null || activeSubclusters .size () < 1 ) {
247
+ Map <SubClusterId , SubClusterInfo > activeSubClusters ) throws YarnException {
248
+ if (activeSubClusters == null || activeSubClusters .size () < 1 ) {
254
249
RouterServerUtil .logAndThrowException (
255
250
FederationPolicyUtils .NO_ACTIVE_SUBCLUSTER_AVAILABLE , null );
256
251
}
257
- List <SubClusterId > list = new ArrayList <>(activeSubclusters .keySet ());
258
-
252
+ List <SubClusterId > list = new ArrayList <>(activeSubClusters .keySet ());
259
253
return list .get (rand .nextInt (list .size ()));
260
254
}
261
255
@@ -280,47 +274,50 @@ private SubClusterId getRandomActiveSubCluster(
280
274
public GetNewApplicationResponse getNewApplication (
281
275
GetNewApplicationRequest request ) throws YarnException , IOException {
282
276
283
- long startTime = clock .getTime ();
277
+ if (request == null ) {
278
+ routerMetrics .incrAppsFailedCreated ();
279
+ String errMsg = "Missing getNewApplication request." ;
280
+ RouterAuditLogger .logFailure (user .getShortUserName (),
281
+ RouterAuditLogger .AuditConstants .GET_NEW_APP , "UNKNOWN" ,
282
+ "RouterClientRMService" , errMsg );
283
+ RouterServerUtil .logAndThrowException (errMsg , null );
284
+ }
284
285
286
+ long startTime = clock .getTime ();
285
287
Map <SubClusterId , SubClusterInfo > subClustersActive =
286
288
federationFacade .getSubClusters (true );
287
289
290
+ GetNewApplicationResponse response = null ;
291
+
288
292
for (int i = 0 ; i < numSubmitRetries ; ++i ) {
289
293
SubClusterId subClusterId = getRandomActiveSubCluster (subClustersActive );
290
- LOG .debug (
291
- "getNewApplication try #{} on SubCluster {}" , i , subClusterId );
292
- ApplicationClientProtocol clientRMProxy =
293
- getClientRMProxyForSubCluster (subClusterId );
294
- GetNewApplicationResponse response = null ;
294
+ LOG .info ("getNewApplication try #{} on SubCluster {}" , i , subClusterId );
295
+ ApplicationClientProtocol clientRMProxy = getClientRMProxyForSubCluster (subClusterId );
296
+ response = null ;
295
297
try {
296
298
response = clientRMProxy .getNewApplication (request );
297
299
} catch (Exception e ) {
298
- LOG .warn ("Unable to create a new ApplicationId in SubCluster "
299
- + subClusterId . getId (), e );
300
+ LOG .warn ("Unable to create a new ApplicationId in SubCluster {}." , subClusterId . getId (), e );
301
+ subClustersActive . remove ( subClusterId );
300
302
}
301
303
302
304
if (response != null ) {
303
-
304
305
long stopTime = clock .getTime ();
305
306
routerMetrics .succeededAppsCreated (stopTime - startTime );
306
307
RouterAuditLogger .logSuccess (user .getShortUserName (),
307
308
RouterAuditLogger .AuditConstants .GET_NEW_APP ,
308
309
"RouterClientRMService" , response .getApplicationId ());
309
310
return response ;
310
- } else {
311
- // Empty response from the ResourceManager.
312
- // Blacklist this subcluster for this request.
313
- subClustersActive .remove (subClusterId );
314
311
}
315
-
316
312
}
317
313
318
314
routerMetrics .incrAppsFailedCreated ();
319
- String errMsg = "Fail to create a new application." ;
315
+ String errMsg = "Failed to create a new application." ;
320
316
RouterAuditLogger .logFailure (user .getShortUserName (),
321
317
RouterAuditLogger .AuditConstants .GET_NEW_APP , "UNKNOWN" ,
322
318
"RouterClientRMService" , errMsg );
323
- throw new YarnException (errMsg );
319
+ RouterServerUtil .logAndThrowException (errMsg , null );
320
+ return response ;
324
321
}
325
322
326
323
/**
@@ -392,32 +389,31 @@ public GetNewApplicationResponse getNewApplication(
392
389
public SubmitApplicationResponse submitApplication (
393
390
SubmitApplicationRequest request ) throws YarnException , IOException {
394
391
395
- long startTime = clock .getTime ();
396
-
397
392
if (request == null || request .getApplicationSubmissionContext () == null
398
- || request .getApplicationSubmissionContext ()
399
- .getApplicationId () == null ) {
393
+ || request .getApplicationSubmissionContext ().getApplicationId () == null ) {
400
394
routerMetrics .incrAppsFailedSubmitted ();
401
395
String errMsg =
402
- "Missing submitApplication request or applicationSubmissionContext "
403
- + "information." ;
396
+ "Missing submitApplication request or applicationSubmissionContext information." ;
404
397
RouterAuditLogger .logFailure (user .getShortUserName (),
405
398
RouterAuditLogger .AuditConstants .SUBMIT_NEW_APP , "UNKNOWN" ,
406
399
"RouterClientRMService" , errMsg );
407
- throw new YarnException (errMsg );
400
+ RouterServerUtil . logAndThrowException (errMsg , null );
408
401
}
409
402
403
+ long startTime = clock .getTime ();
404
+
410
405
ApplicationId applicationId =
411
406
request .getApplicationSubmissionContext ().getApplicationId ();
412
407
413
- List <SubClusterId > blacklist = new ArrayList <SubClusterId >();
408
+ List <SubClusterId > blacklist = new ArrayList <>();
414
409
415
410
for (int i = 0 ; i < numSubmitRetries ; ++i ) {
416
411
417
412
SubClusterId subClusterId = policyFacade .getHomeSubcluster (
418
413
request .getApplicationSubmissionContext (), blacklist );
419
- LOG .info ("submitApplication appId {} try #{} on SubCluster {}." , applicationId , i ,
420
- subClusterId );
414
+
415
+ LOG .info ("submitApplication appId {} try #{} on SubCluster {}." ,
416
+ applicationId , i , subClusterId );
421
417
422
418
ApplicationHomeSubCluster appHomeSubCluster =
423
419
ApplicationHomeSubCluster .newInstance (applicationId , subClusterId );
@@ -430,32 +426,34 @@ public SubmitApplicationResponse submitApplication(
430
426
federationFacade .addApplicationHomeSubCluster (appHomeSubCluster );
431
427
} catch (YarnException e ) {
432
428
routerMetrics .incrAppsFailedSubmitted ();
433
- String message = "Unable to insert the ApplicationId " + applicationId
434
- + " into the FederationStateStore" ;
429
+ String message =
430
+ String .format ("Unable to insert the ApplicationId %s into the FederationStateStore." ,
431
+ applicationId );
435
432
RouterAuditLogger .logFailure (user .getShortUserName (),
436
433
RouterAuditLogger .AuditConstants .SUBMIT_NEW_APP , "UNKNOWN" ,
437
434
"RouterClientRMService" , message , applicationId , subClusterId );
438
- throw new YarnException (message , e );
435
+ RouterServerUtil . logAndThrowException (message , e );
439
436
}
440
437
} else {
441
438
try {
442
439
// update the mapping of applicationId and the home subClusterId to
443
440
// the new subClusterId we have selected
444
441
federationFacade .updateApplicationHomeSubCluster (appHomeSubCluster );
445
442
} catch (YarnException e ) {
446
- String message = "Unable to update the ApplicationId " + applicationId
447
- + " into the FederationStateStore" ;
443
+ String message =
444
+ String .format ("Unable to update the ApplicationId %s into the FederationStateStore." ,
445
+ applicationId );
448
446
SubClusterId subClusterIdInStateStore =
449
447
federationFacade .getApplicationHomeSubCluster (applicationId );
450
448
if (subClusterId == subClusterIdInStateStore ) {
451
- LOG .info ("Application {} already submitted on SubCluster {}." , applicationId ,
452
- subClusterId );
449
+ LOG .info ("Application {} already submitted on SubCluster {}." ,
450
+ applicationId , subClusterId );
453
451
} else {
454
452
routerMetrics .incrAppsFailedSubmitted ();
455
453
RouterAuditLogger .logFailure (user .getShortUserName (),
456
454
RouterAuditLogger .AuditConstants .SUBMIT_NEW_APP , "UNKNOWN" ,
457
455
"RouterClientRMService" , message , applicationId , subClusterId );
458
- throw new YarnException (message , e );
456
+ RouterServerUtil . logAndThrowException (message , e );
459
457
}
460
458
}
461
459
}
@@ -489,9 +487,8 @@ public SubmitApplicationResponse submitApplication(
489
487
}
490
488
491
489
routerMetrics .incrAppsFailedSubmitted ();
492
- String errMsg = "Application "
493
- + request .getApplicationSubmissionContext ().getApplicationName ()
494
- + " with appId " + applicationId + " failed to be submitted." ;
490
+ String errMsg = String .format ("Application %s with appId %s failed to be submitted." ,
491
+ request .getApplicationSubmissionContext ().getApplicationName (), applicationId );
495
492
RouterAuditLogger .logFailure (user .getShortUserName (),
496
493
RouterAuditLogger .AuditConstants .SUBMIT_NEW_APP , "UNKNOWN" ,
497
494
"RouterClientRMService" , errMsg , applicationId );
0 commit comments