Skip to content

Commit df35c7f

Browse files
YARN-10526. RMAppManager CS Placement ignores parent path. Contributed by Gergely Pollak
1 parent 5bf977e commit df35c7f

File tree

4 files changed

+114
-7
lines changed

4 files changed

+114
-7
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,11 +500,25 @@ private RMAppImpl createAndPopulateNewRMApp(
500500
}
501501
}
502502

503+
//In the case of capacity scheduler the queue name only means the name of
504+
// the leaf queue, but since YARN-9879, internal queue references should
505+
// use full path, so we get the queue and parent name from the placement
506+
// context instead of the submissionContext.
507+
String placementQueueName = submissionContext.getQueue();
508+
if (placementContext != null && scheduler instanceof CapacityScheduler) {
509+
if (placementContext.hasParentQueue()) {
510+
placementQueueName = placementContext.getParentQueue() + "." +
511+
placementContext.getQueue();
512+
} else {
513+
placementQueueName = placementContext.getQueue();
514+
}
515+
}
516+
503517
// Create RMApp
504518
RMAppImpl application =
505519
new RMAppImpl(applicationId, rmContext, this.conf,
506520
submissionContext.getApplicationName(), user,
507-
submissionContext.getQueue(),
521+
placementQueueName,
508522
submissionContext, this.scheduler, this.masterService,
509523
submitTime, submissionContext.getApplicationType(),
510524
submissionContext.getApplicationTags(), amReqs, placementContext,

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,40 @@ CSAssignment allocateContainersToNode(
18351835
return assignment;
18361836
}
18371837

1838+
/**
1839+
* This method extracts the actual queue name from an app add event.
1840+
* Currently unfortunately ApplicationPlacementContext and
1841+
* ApplicationSubmissionContext are used in a quite erratic way, this method
1842+
* helps to get the proper placement path for the queue if placement context
1843+
* is provided
1844+
* @param appAddedEvent The application add event with details about the app
1845+
* @return The name of the queue the application should be added
1846+
*/
1847+
private String getAddedAppQueueName(AppAddedSchedulerEvent appAddedEvent) {
1848+
//appAddedEvent uses the queue from ApplicationSubmissionContext but in
1849+
//the case of CS it may be only a leaf name due to legacy reasons
1850+
String ret = appAddedEvent.getQueue();
1851+
ApplicationPlacementContext placementContext =
1852+
appAddedEvent.getPlacementContext();
1853+
1854+
//If we have a placement context, it means a mapping rule made a decision
1855+
//about the queue placement, so we use those data, it is supposed to be in
1856+
//sync with the ApplicationSubmissionContext and appAddedEvent.getQueue, but
1857+
//because of the aforementioned legacy reasons these two may only contain
1858+
//the leaf queue name.
1859+
if (placementContext != null) {
1860+
String leafName = placementContext.getQueue();
1861+
String parentName = placementContext.getParentQueue();
1862+
if (leafName != null) {
1863+
//building the proper queue path from the parent and leaf queue name
1864+
ret = placementContext.hasParentQueue() ?
1865+
(parentName + "." + leafName) : leafName;
1866+
}
1867+
}
1868+
1869+
return ret;
1870+
}
1871+
18381872
@Override
18391873
public void handle(SchedulerEvent event) {
18401874
switch(event.getType()) {
@@ -1886,9 +1920,9 @@ public void handle(SchedulerEvent event) {
18861920
case APP_ADDED:
18871921
{
18881922
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
1889-
String queueName = resolveReservationQueueName(appAddedEvent.getQueue(),
1890-
appAddedEvent.getApplicationId(), appAddedEvent.getReservationID(),
1891-
appAddedEvent.getIsAppRecovering());
1923+
String queueName = resolveReservationQueueName(
1924+
getAddedAppQueueName(appAddedEvent), appAddedEvent.getApplicationId(),
1925+
appAddedEvent.getReservationID(), appAddedEvent.getIsAppRecovering());
18921926
if (queueName != null) {
18931927
if (!appAddedEvent.getIsAppRecovering()) {
18941928
addApplication(appAddedEvent.getApplicationId(), queueName,

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ public static CapacitySchedulerConfiguration setupQueueConfiguration(
420420
(C, NODEL_LABEL_SSD);
421421

422422

423-
LOG.info("Setup " + C + " as an auto leaf creation enabled parent queue");
423+
LOG.info("Setup " + D + " as an auto leaf creation enabled parent queue");
424424

425425
conf.setUserLimitFactor(D, 1.0f);
426426
conf.setAutoCreateChildQueueEnabled(D, true);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
4343
import org.apache.hadoop.yarn.server.resourcemanager.placement
4444
.ApplicationPlacementContext;
45+
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
4546
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
4647
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
4748
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@@ -90,6 +91,7 @@
9091

9192
import static org.junit.Assert.assertEquals;
9293
import static org.junit.Assert.assertNotNull;
94+
import static org.junit.Assert.assertNull;
9395
import static org.junit.Assert.assertTrue;
9496
import static org.junit.Assert.fail;
9597
import static org.mockito.Mockito.mock;
@@ -501,16 +503,23 @@ public void testParentQueueUpdateInQueueMappingFailsAfterAutoCreation()
501503

502504
assertNotNull(newCS.getQueue(USER0));
503505

504-
setupQueueMapping(newCS, USER0, "d", USER0);
506+
//The new placement engine's validation is a bit more
507+
//strict so it would reject the original u:user_0:a.user_0 rule since
508+
//it checks if that paths exists or is a managed parent, but if we use
509+
//a.%user we can trick the engine, since it cannot validate if the actual
510+
//value of the %user will exist or not, it allows the rule
511+
setupQueueMapping(newCS, USER0, "a", "%user");
505512
newCS.updatePlacementRules();
506513

507514
RMContext rmContext = mock(RMContext.class);
508515
when(rmContext.getDispatcher()).thenReturn(dispatcher);
509516
newCS.setRMContext(rmContext);
510517

511518
ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
519+
//The new engine would return root.a as the parent queue for this
520+
// submission so creating the ApplicationPlacementContext accordingly
512521
SchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, USER0,
513-
USER0, new ApplicationPlacementContext(USER0, "d"));
522+
USER0, new ApplicationPlacementContext(USER0, "root.a"));
514523
newCS.handle(addAppEvent);
515524

516525
RMAppEvent event = new RMAppEvent(appId, RMAppEventType.APP_REJECTED,
@@ -524,6 +533,56 @@ public void testParentQueueUpdateInQueueMappingFailsAfterAutoCreation()
524533
}
525534
}
526535

536+
/**
537+
* This test case checks if a mapping rule can put an application to an auto
538+
* created queue even if an other queue with the same leaf name already
539+
* exists.
540+
*
541+
* In this scenario we use the following queues
542+
* root.a.a1 - already existing queue
543+
* root.c - managed parent queue
544+
*
545+
* And the following mapping rule
546+
* u:%user:root.c.%user - Any submission should go to root.c.USERNAME queue
547+
*
548+
* When user 'a1' submits a new application we expect it to go to 'root.c.a1'
549+
* because of the mapping rule, and the queue should be created.
550+
*
551+
* @throws Exception - When stuff go wrong, obvious reason to fail the test
552+
*/
553+
@Test
554+
public void testAutoQueueCreationWhenQueueExistsWithSameName()
555+
throws Exception {
556+
557+
MockRM newMockRM = setupSchedulerInstance();
558+
CapacityScheduler newCS =
559+
(CapacityScheduler) newMockRM.getResourceScheduler();
560+
561+
try {
562+
setupQueueMapping(newCS, "%user", "root.c", "%user");
563+
newCS.updatePlacementRules();
564+
565+
//making sure the target queue does not exist before submission
566+
assertNull(newCS.getQueue("root.c.a1"));
567+
RMApp app = MockRMAppSubmitter.submit(newMockRM,
568+
MockRMAppSubmissionData.Builder.createWithMemory(512, newMockRM)
569+
.withAppName("testAutoQueueCreationWhenQueueExistsWithSameName")
570+
.withUser("a1")
571+
.withQueue("default")
572+
.build());
573+
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, newMockRM);
574+
//checking if the target queue have been created during the submission
575+
assertNotNull(newCS.getQueue("root.c.a1"));
576+
//making sure the application is indeed in the right queue
577+
assertEquals("root.c.a1", app.getQueue());
578+
} finally {
579+
if (newMockRM != null) {
580+
((CapacityScheduler) newMockRM.getResourceScheduler()).stop();
581+
newMockRM.stop();
582+
}
583+
}
584+
}
585+
527586
@Test
528587
public void testAutoCreationFailsWhenParentCapacityExceeded()
529588
throws Exception {

0 commit comments

Comments
 (0)