Skip to content

Commit addad56

Browse files
committed
YARN-11200 Backport numa to branch-2.10
1 parent 5d96646 commit addad56

File tree

11 files changed

+1341
-1
lines changed

11 files changed

+1341
-1
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3524,6 +3524,22 @@ public static boolean areNodeLabelsEnabled(
35243524
public static final String DEFAULT_YARN_WORKFLOW_ID_TAG_PREFIX =
35253525
"workflowid:";
35263526

3527+
/**
3528+
* Settings for NUMA awareness.
3529+
*/
3530+
public static final String NM_NUMA_AWARENESS_ENABLED = NM_PREFIX
3531+
+ "numa-awareness.enabled";
3532+
public static final boolean DEFAULT_NM_NUMA_AWARENESS_ENABLED = false;
3533+
public static final String NM_NUMA_AWARENESS_READ_TOPOLOGY = NM_PREFIX
3534+
+ "numa-awareness.read-topology";
3535+
public static final boolean DEFAULT_NM_NUMA_AWARENESS_READ_TOPOLOGY = false;
3536+
public static final String NM_NUMA_AWARENESS_NODE_IDS = NM_PREFIX
3537+
+ "numa-awareness.node-ids";
3538+
public static final String NM_NUMA_AWARENESS_NUMACTL_CMD = NM_PREFIX
3539+
+ "numa-awareness.numactl.cmd";
3540+
public static final String DEFAULT_NM_NUMA_AWARENESS_NUMACTL_CMD =
3541+
"/usr/bin/numactl";
3542+
35273543
public YarnConfiguration() {
35283544
super();
35293545
}
@@ -3535,6 +3551,17 @@ public YarnConfiguration(Configuration conf) {
35353551
}
35363552
}
35373553

3554+
/**
3555+
* Returns whether the NUMA awareness is enabled.
3556+
*
3557+
* @param conf the configuration
3558+
* @return whether the NUMA awareness is enabled.
3559+
*/
3560+
public static boolean numaAwarenessEnabled(Configuration conf) {
3561+
return conf.getBoolean(NM_NUMA_AWARENESS_ENABLED,
3562+
DEFAULT_NM_NUMA_AWARENESS_ENABLED);
3563+
}
3564+
35383565
@Private
35393566
public static List<String> getServiceAddressConfKeys(Configuration conf) {
35403567
return useHttps(conf) ? RM_SERVICES_ADDRESS_CONF_KEYS_HTTPS

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,4 +3680,56 @@
36803680
<name>yarn.client.load.resource-types.from-server</name>
36813681
<value>false</value>
36823682
</property>
3683+
3684+
<property>
3685+
<description>
3686+
Whether to enable the NUMA awareness for containers in Node Manager.
3687+
</description>
3688+
<name>yarn.nodemanager.numa-awareness.enabled</name>
3689+
<value>false</value>
3690+
</property>
3691+
3692+
<property>
3693+
<description>
3694+
Whether to read the NUMA topology from the system or from the
3695+
configurations. If the value is true then NM reads the NUMA topology from
3696+
system using the command 'numactl --hardware'. If the value is false then NM
3697+
reads the topology from the configurations
3698+
'yarn.nodemanager.numa-awareness.node-ids'(for node id's),
3699+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.memory'(for each node memory),
3700+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.cpus'(for each node cpus).
3701+
</description>
3702+
<name>yarn.nodemanager.numa-awareness.read-topology</name>
3703+
<value>false</value>
3704+
</property>
3705+
3706+
<property>
3707+
<description>
3708+
NUMA node id's in the form of comma separated list. Memory and No of CPUs
3709+
will be read using the properties
3710+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.memory' and
3711+
'yarn.nodemanager.numa-awareness.&lt;NODE_ID&gt;.cpus' for each id specified
3712+
in this value. This property value will be read only when
3713+
'yarn.nodemanager.numa-awareness.read-topology=false'.
3714+
3715+
For example, if yarn.nodemanager.numa-awareness.node-ids=0,1
3716+
then need to specify memory and cpus for node id's '0' and '1' like below,
3717+
yarn.nodemanager.numa-awareness.0.memory=73717
3718+
yarn.nodemanager.numa-awareness.0.cpus=4
3719+
yarn.nodemanager.numa-awareness.1.memory=73727
3720+
yarn.nodemanager.numa-awareness.1.cpus=4
3721+
</description>
3722+
<name>yarn.nodemanager.numa-awareness.node-ids</name>
3723+
<value></value>
3724+
</property>
3725+
3726+
<property>
3727+
<description>
3728+
The numactl command path which controls NUMA policy for processes or
3729+
shared memory.
3730+
</description>
3731+
<name>yarn.nodemanager.numa-awareness.numactl.cmd</name>
3732+
<value>/usr/bin/numactl</value>
3733+
</property>
3734+
36833735
</configuration>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
111111
private boolean containerLimitUsers;
112112
private ResourceHandler resourceHandlerChain;
113113
private LinuxContainerRuntime linuxContainerRuntime;
114+
private Context nmContext;
114115

115116
/**
116117
* The container exit code.
@@ -262,6 +263,13 @@ protected String getContainerExecutorExecutablePath(Configuration conf) {
262263
defaultPath);
263264
}
264265

266+
private void addNumaArgsToCommand(List<String> prefixCommands,
267+
List<String> numaArgs) {
268+
if (numaArgs != null) {
269+
prefixCommands.addAll(numaArgs);
270+
}
271+
}
272+
265273
/**
266274
* Add a niceness level to the process that will be executed. Adds
267275
* {@code -n <nice>} to the given command. The niceness level will be
@@ -283,6 +291,7 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
283291

284292
@Override
285293
public void init(Context nmContext) throws IOException {
294+
this.nmContext = nmContext;
286295
Configuration conf = super.getConf();
287296

288297
// Send command to executor which will just start up,
@@ -475,6 +484,7 @@ public int launchContainer(ContainerStartContext ctx)
475484
container.getResource());
476485
String resourcesOptions = resourcesHandler.getResourcesOption(containerId);
477486
String tcCommandFile = null;
487+
List<String> numaArgs = null;
478488

479489
try {
480490
if (resourceHandlerChain != null) {
@@ -496,6 +506,9 @@ public int launchContainer(ContainerStartContext ctx)
496506
case TC_MODIFY_STATE:
497507
tcCommandFile = op.getArguments().get(0);
498508
break;
509+
case ADD_NUMA_PARAMS:
510+
numaArgs = op.getArguments();
511+
break;
499512
default:
500513
LOG.warn("PrivilegedOperation type unsupported in launch: "
501514
+ op.getOperationType());
@@ -529,6 +542,7 @@ public int launchContainer(ContainerStartContext ctx)
529542
.Builder(container);
530543

531544
addSchedPriorityCommand(prefixCommands);
545+
addNumaArgsToCommand(prefixCommands, numaArgs);
532546
if (prefixCommands.size() > 0) {
533547
builder.setExecutionAttribute(CONTAINER_LAUNCH_PREFIX_COMMANDS,
534548
prefixCommands);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperation.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ public enum OperationType {
5252
ADD_PID_TO_CGROUP(""), //no CLI switch supported yet.
5353
RUN_DOCKER_CMD("--run-docker"),
5454
GPU("--module-gpu"),
55-
LIST_AS_USER(""); //no CLI switch supported yet.
55+
LIST_AS_USER(""), //no CLI switch supported yet.
56+
ADD_NUMA_PARAMS(""); // no CLI switch supported yet.
5657

5758
private final String option;
5859

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.hadoop.yarn.conf.YarnConfiguration;
2828
import org.apache.hadoop.yarn.server.nodemanager.Context;
2929
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
30+
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.numa.NumaResourceHandlerImpl;
3031
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
3132
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager;
3233
import org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler;
@@ -208,6 +209,14 @@ private static void addHandlerIfNotNull(List<ResourceHandler> handlerList,
208209
}
209210
}
210211

212+
private static ResourceHandler getNumaResourceHandler(Configuration conf,
213+
Context nmContext) {
214+
if (YarnConfiguration.numaAwarenessEnabled(conf)) {
215+
return new NumaResourceHandlerImpl(conf, nmContext);
216+
}
217+
return null;
218+
}
219+
211220
private static void initializeConfiguredResourceHandlerChain(
212221
Configuration conf, Context nmContext)
213222
throws ResourceHandlerException {
@@ -218,6 +227,7 @@ private static void initializeConfiguredResourceHandlerChain(
218227
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
219228
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
220229
addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
230+
addHandlerIfNotNull(handlerList, getNumaResourceHandler(conf, nmContext));
221231
resourceHandlerChain = new ResourceHandlerChain(handlerList);
222232
}
223233

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
2+
/**
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
*/
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.numa;
20+
21+
import java.util.Map;
22+
import java.util.concurrent.ConcurrentHashMap;
23+
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
import org.apache.hadoop.yarn.api.records.ContainerId;
27+
import org.apache.hadoop.yarn.api.records.Resource;
28+
29+
/**
30+
* NumaNodeResource class holds the NUMA node topology with the total and used
31+
* resources.
32+
*/
33+
public class NumaNodeResource {
34+
private String nodeId;
35+
private long totalMemory;
36+
private int totalCpus;
37+
private long usedMemory;
38+
private int usedCpus;
39+
40+
private static final Logger LOG = LoggerFactory.
41+
getLogger(NumaNodeResource.class);
42+
43+
private Map<ContainerId, Long> containerVsMemUsage =
44+
new ConcurrentHashMap<>();
45+
private Map<ContainerId, Integer> containerVsCpusUsage =
46+
new ConcurrentHashMap<>();
47+
48+
public NumaNodeResource(String nodeId, long totalMemory, int totalCpus) {
49+
this.nodeId = nodeId;
50+
this.totalMemory = totalMemory;
51+
this.totalCpus = totalCpus;
52+
}
53+
54+
/**
55+
* Checks whether the specified resources available or not.
56+
*
57+
* @param resource resource
58+
* @return whether the specified resources available or not
59+
*/
60+
public boolean isResourcesAvailable(Resource resource) {
61+
LOG.debug(
62+
"Memory available:" + (totalMemory - usedMemory) + ", CPUs available:"
63+
+ (totalCpus - usedCpus) + ", requested:" + resource);
64+
if ((totalMemory - usedMemory) >= resource.getMemorySize()
65+
&& (totalCpus - usedCpus) >= resource.getVirtualCores()) {
66+
return true;
67+
}
68+
return false;
69+
}
70+
71+
/**
72+
* Assigns available memory and returns the remaining needed memory.
73+
*
74+
* @param memreq required memory
75+
* @param containerId which container memory to assign
76+
* @return remaining needed memory
77+
*/
78+
public long assignAvailableMemory(long memreq, ContainerId containerId) {
79+
long memAvailable = totalMemory - usedMemory;
80+
if (memAvailable >= memreq) {
81+
containerVsMemUsage.put(containerId, memreq);
82+
usedMemory += memreq;
83+
return 0;
84+
} else {
85+
usedMemory += memAvailable;
86+
containerVsMemUsage.put(containerId, memAvailable);
87+
return memreq - memAvailable;
88+
}
89+
}
90+
91+
/**
92+
* Assigns available cpu's and returns the remaining needed cpu's.
93+
*
94+
* @param cpusreq required cpu's
95+
* @param containerId which container cpu's to assign
96+
* @return remaining needed cpu's
97+
*/
98+
public int assignAvailableCpus(int cpusreq, ContainerId containerId) {
99+
int cpusAvailable = totalCpus - usedCpus;
100+
if (cpusAvailable >= cpusreq) {
101+
containerVsCpusUsage.put(containerId, cpusreq);
102+
usedCpus += cpusreq;
103+
return 0;
104+
} else {
105+
usedCpus += cpusAvailable;
106+
containerVsCpusUsage.put(containerId, cpusAvailable);
107+
return cpusreq - cpusAvailable;
108+
}
109+
}
110+
111+
/**
112+
* Assigns the requested resources for Container.
113+
*
114+
* @param resource resource to assign
115+
* @param containerId to which container the resources to assign
116+
*/
117+
public void assignResources(Resource resource, ContainerId containerId) {
118+
containerVsMemUsage.put(containerId, resource.getMemorySize());
119+
containerVsCpusUsage.put(containerId, resource.getVirtualCores());
120+
usedMemory += resource.getMemorySize();
121+
usedCpus += resource.getVirtualCores();
122+
}
123+
124+
/**
125+
* Releases the assigned resources for Container.
126+
*
127+
* @param containerId to which container the assigned resources to release
128+
*/
129+
public void releaseResources(ContainerId containerId) {
130+
if (containerVsMemUsage.containsKey(containerId)) {
131+
usedMemory -= containerVsMemUsage.get(containerId);
132+
containerVsMemUsage.remove(containerId);
133+
}
134+
if (containerVsCpusUsage.containsKey(containerId)) {
135+
usedCpus -= containerVsCpusUsage.get(containerId);
136+
containerVsCpusUsage.remove(containerId);
137+
}
138+
}
139+
140+
/**
141+
* Recovers the memory resources for Container.
142+
*
143+
* @param containerId recover the memory resources for the Container
144+
* @param memory memory to recover
145+
*/
146+
public void recoverMemory(ContainerId containerId, long memory) {
147+
containerVsMemUsage.put(containerId, memory);
148+
usedMemory += memory;
149+
}
150+
151+
/**
152+
* Recovers the cpu's resources for Container.
153+
*
154+
* @param containerId recover the cpu's resources for the Container
155+
* @param cpus cpu's to recover
156+
*/
157+
public void recoverCpus(ContainerId containerId, int cpus) {
158+
containerVsCpusUsage.put(containerId, cpus);
159+
usedCpus += cpus;
160+
}
161+
162+
@Override
163+
public String toString() {
164+
return "Node Id:" + nodeId + "\tMemory:" + totalMemory + "\tCPus:"
165+
+ totalCpus;
166+
}
167+
168+
@Override
169+
public int hashCode() {
170+
final int prime = 31;
171+
int result = 1;
172+
result = prime * result + ((nodeId == null) ? 0 : nodeId.hashCode());
173+
result = prime * result + (int) (totalMemory ^ (totalMemory >>> 32));
174+
result = prime * result + totalCpus;
175+
return result;
176+
}
177+
178+
@Override
179+
public boolean equals(Object obj) {
180+
if (this == obj) {
181+
return true;
182+
}
183+
if (obj == null || getClass() != obj.getClass()) {
184+
return false;
185+
}
186+
NumaNodeResource other = (NumaNodeResource) obj;
187+
if (nodeId == null) {
188+
if (other.nodeId != null) {
189+
return false;
190+
}
191+
} else if (!nodeId.equals(other.nodeId)) {
192+
return false;
193+
}
194+
if (totalMemory != other.totalMemory) {
195+
return false;
196+
}
197+
return totalCpus == other.totalCpus;
198+
}
199+
200+
public String getNodeId() {
201+
return nodeId;
202+
}
203+
}

0 commit comments

Comments
 (0)