Skip to content

Commit 538bb48

Browse files
Szilard Nemethtempledf
authored andcommitted
YARN-9323. FSLeafQueue#computeMaxAMResource does not override zero values for custom resources
(Contributed by Szilard Nemeth via Daniel Templeton) Change-Id: Id844ccf09488f367c0c7de0a3b2d4aca1bba31cc
1 parent 7b928f1 commit 538bb48

File tree

5 files changed

+172
-0
lines changed

5 files changed

+172
-0
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,4 +832,8 @@ public long getAggegatedReleasedContainers() {
832832
public long getAggregatePreemptedContainers() {
833833
return aggregateContainersPreempted.value();
834834
}
835+
836+
public QueueMetricsForCustomResources getQueueMetricsForCustomResources() {
837+
return queueMetricsForCustomResources;
838+
}
835839
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetricsForCustomResources.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,8 @@ Map<String, Long> getReservedValues() {
101101
QueueMetricsCustomResource getAggregatePreemptedSeconds() {
102102
return aggregatePreemptedSeconds;
103103
}
104+
105+
public QueueMetricsCustomResource getAvailable() {
106+
return available;
107+
}
104108
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Collections;
2424
import java.util.HashSet;
2525
import java.util.List;
26+
import java.util.Map;
2627
import java.util.Set;
2728
import java.util.concurrent.locks.Lock;
2829
import java.util.concurrent.locks.ReadWriteLock;
@@ -42,6 +43,8 @@
4243
import org.apache.hadoop.yarn.api.records.Resource;
4344
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
4445
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
46+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsCustomResource;
47+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
4548
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
4649
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
4750
import org.apache.hadoop.yarn.util.resource.Resources;
@@ -517,6 +520,29 @@ private Resource computeMaxAMResource() {
517520
getMaxShare().getVirtualCores()));
518521
}
519522

523+
QueueMetricsForCustomResources metricsForCustomResources =
524+
scheduler.getRootQueueMetrics().getQueueMetricsForCustomResources();
525+
526+
if (metricsForCustomResources != null) {
527+
QueueMetricsCustomResource availableResources =
528+
metricsForCustomResources.getAvailable();
529+
530+
// We expect all custom resources contained in availableResources,
531+
// so we will loop through all of them.
532+
for (Map.Entry<String, Long> availableEntry : availableResources
533+
.getValues().entrySet()) {
534+
String resourceName = availableEntry.getKey();
535+
536+
// We only update the value if fairshare is 0 for that resource.
537+
if (maxResource.getResourceValue(resourceName) == 0) {
538+
Long availableValue = availableEntry.getValue();
539+
long value = Math.min(availableValue,
540+
getMaxShare().getResourceValue(resourceName));
541+
maxResource.setResourceValue(resourceName, value);
542+
}
543+
}
544+
}
545+
520546
// Round up to allow AM to run when there is only one vcore on the cluster
521547
return Resources.multiplyAndRoundUp(maxResource, maxAMShare);
522548
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,4 +310,8 @@ static FSQueueMetrics forQueue(MetricsSystem ms, String queueName,
310310

311311
return (FSQueueMetrics)metrics;
312312
}
313+
314+
FSQueueMetricsForCustomResources getCustomResources() {
315+
return customResources;
316+
}
313317
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import java.util.concurrent.ExecutorService;
3535
import java.util.concurrent.TimeUnit;
3636

37+
import com.google.common.collect.ImmutableMap;
3738
import org.apache.hadoop.util.concurrent.HadoopExecutors;
3839
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
3940
import org.apache.hadoop.yarn.api.records.Resource;
@@ -42,19 +43,26 @@
4243
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
4344
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
4445
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
46+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsCustomResource;
4547
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
4648
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
4749
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
50+
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
4851
import org.apache.hadoop.yarn.util.resource.Resources;
4952
import org.junit.After;
5053
import org.junit.Before;
5154
import org.junit.Test;
5255
import org.mockito.Mockito;
56+
import java.util.Map;
57+
import static org.junit.Assert.assertFalse;
58+
import static org.junit.Assert.assertNotNull;
5359

5460
public class TestFSLeafQueue extends FairSchedulerTestBase {
5561
private final static String ALLOC_FILE = new File(TEST_DIR,
5662
TestFSLeafQueue.class.getName() + ".xml").getAbsolutePath();
5763
private Resource maxResource = Resources.createResource(1024 * 8);
64+
private static final float MAX_AM_SHARE = 0.5f;
65+
private static final String CUSTOM_RESOURCE = "test1";
5866

5967
@Before
6068
public void setup() throws IOException {
@@ -105,6 +113,8 @@ public void test() throws Exception {
105113
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
106114
out.println("<?xml version=\"1.0\"?>");
107115
out.println("<allocations>");
116+
out.println("<queueMaxAMShareDefault>" + MAX_AM_SHARE +
117+
"</queueMaxAMShareDefault>");
108118
out.println("<queue name=\"queueA\"></queue>");
109119
out.println("<queue name=\"queueB\"></queue>");
110120
out.println("</allocations>");
@@ -221,4 +231,128 @@ public void run() {
221231
assertTrue("Test failed with exception(s)" + exceptions,
222232
exceptions.isEmpty());
223233
}
234+
235+
@Test
236+
public void testCanRunAppAMReturnsTrue() {
237+
conf.set(YarnConfiguration.RESOURCE_TYPES, CUSTOM_RESOURCE);
238+
ResourceUtils.resetResourceTypes(conf);
239+
240+
resourceManager = new MockRM(conf);
241+
resourceManager.start();
242+
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
243+
244+
Resource maxShare = Resource.newInstance(1024 * 8, 4,
245+
ImmutableMap.of(CUSTOM_RESOURCE, 10L));
246+
247+
// Add a node to increase available memory and vcores in scheduler's
248+
// root queue metrics
249+
addNodeToScheduler(Resource.newInstance(4096, 10,
250+
ImmutableMap.of(CUSTOM_RESOURCE, 25L)));
251+
252+
FSLeafQueue queue = setupQueue(maxShare);
253+
254+
//Min(availableMemory, maxShareMemory (maxResourceOverridden))
255+
// --> Min(4096, 8192) = 4096
256+
//Min(availableVCores, maxShareVCores (maxResourceOverridden))
257+
// --> Min(10, 4) = 4
258+
//Min(available test1, maxShare test1 (maxResourceOverridden))
259+
// --> Min(25, 10) = 10
260+
//MaxAMResource: (4096 MB memory, 4 vcores, 10 test1) * MAX_AM_SHARE
261+
// --> 2048 MB memory, 2 vcores, 5 test1
262+
Resource expectedAMShare = Resource.newInstance(2048, 2,
263+
ImmutableMap.of(CUSTOM_RESOURCE, 5L));
264+
265+
Resource appAMResource = Resource.newInstance(2048, 2,
266+
ImmutableMap.of(CUSTOM_RESOURCE, 3L));
267+
268+
Map<String, Long> customResourceValues =
269+
verifyQueueMetricsForCustomResources(queue);
270+
271+
boolean result = queue.canRunAppAM(appAMResource);
272+
assertTrue("AM should have been allocated!", result);
273+
274+
verifyAMShare(queue, expectedAMShare, customResourceValues);
275+
}
276+
277+
private FSLeafQueue setupQueue(Resource maxShare) {
278+
String queueName = "root.queue1";
279+
FSLeafQueue schedulable = new FSLeafQueue(queueName, scheduler, null);
280+
schedulable.setMaxShare(new ConfigurableResource(maxShare));
281+
schedulable.setMaxAMShare(MAX_AM_SHARE);
282+
return schedulable;
283+
}
284+
285+
@Test
286+
public void testCanRunAppAMReturnsFalse() {
287+
conf.set(YarnConfiguration.RESOURCE_TYPES, CUSTOM_RESOURCE);
288+
ResourceUtils.resetResourceTypes(conf);
289+
290+
resourceManager = new MockRM(conf);
291+
resourceManager.start();
292+
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
293+
294+
Resource maxShare = Resource.newInstance(1024 * 8, 4,
295+
ImmutableMap.of(CUSTOM_RESOURCE, 10L));
296+
297+
// Add a node to increase available memory and vcores in scheduler's
298+
// root queue metrics
299+
addNodeToScheduler(Resource.newInstance(4096, 10,
300+
ImmutableMap.of(CUSTOM_RESOURCE, 25L)));
301+
302+
FSLeafQueue queue = setupQueue(maxShare);
303+
304+
//Min(availableMemory, maxShareMemory (maxResourceOverridden))
305+
// --> Min(4096, 8192) = 4096
306+
//Min(availableVCores, maxShareVCores (maxResourceOverridden))
307+
// --> Min(10, 4) = 4
308+
//Min(available test1, maxShare test1 (maxResourceOverridden))
309+
// --> Min(25, 10) = 10
310+
//MaxAMResource: (4096 MB memory, 4 vcores, 10 test1) * MAX_AM_SHARE
311+
// --> 2048 MB memory, 2 vcores, 5 test1
312+
Resource expectedAMShare = Resource.newInstance(2048, 2,
313+
ImmutableMap.of(CUSTOM_RESOURCE, 5L));
314+
315+
Resource appAMResource = Resource.newInstance(2048, 2,
316+
ImmutableMap.of(CUSTOM_RESOURCE, 6L));
317+
318+
Map<String, Long> customResourceValues =
319+
verifyQueueMetricsForCustomResources(queue);
320+
321+
boolean result = queue.canRunAppAM(appAMResource);
322+
assertFalse("AM should not have been allocated!", result);
323+
324+
verifyAMShare(queue, expectedAMShare, customResourceValues);
325+
}
326+
327+
private void addNodeToScheduler(Resource node1Resource) {
328+
RMNode node1 = MockNodes.newNodeInfo(0, node1Resource, 1, "127.0.0.2");
329+
scheduler.handle(new NodeAddedSchedulerEvent(node1));
330+
}
331+
332+
private void verifyAMShare(FSLeafQueue schedulable,
333+
Resource expectedAMShare, Map<String, Long> customResourceValues) {
334+
Resource actualAMShare = Resource.newInstance(
335+
schedulable.getMetrics().getMaxAMShareMB(),
336+
schedulable.getMetrics().getMaxAMShareVCores(), customResourceValues);
337+
long customResourceValue =
338+
actualAMShare.getResourceValue(CUSTOM_RESOURCE);
339+
340+
//make sure to verify custom resource value explicitly!
341+
assertEquals(5L, customResourceValue);
342+
assertEquals("AM share is not the expected!", expectedAMShare,
343+
actualAMShare);
344+
}
345+
346+
private Map<String, Long> verifyQueueMetricsForCustomResources(
347+
FSLeafQueue schedulable) {
348+
QueueMetricsCustomResource maxAMShareCustomResources =
349+
schedulable.getMetrics().getCustomResources().getMaxAMShare();
350+
Map<String, Long> customResourceValues = maxAMShareCustomResources
351+
.getValues();
352+
assertNotNull("Queue metrics for custom resources should not be null!",
353+
maxAMShareCustomResources);
354+
assertNotNull("Queue metrics for custom resources resource values " +
355+
"should not be null!", customResourceValues);
356+
return customResourceValues;
357+
}
224358
}

0 commit comments

Comments
 (0)