Skip to content

Commit 19e418c

Browse files
committed
YARN-10713. ClusterMetrics should support custom resource capacity related metrics. Contributed by Qi Zhu.
1 parent af1f9f4 commit 19e418c

File tree

2 files changed

+38
-18
lines changed
  • hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src

2 files changed

+38
-18
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static org.apache.hadoop.metrics2.lib.Interns.info;
2222

23+
import java.util.Map;
2324
import java.util.concurrent.atomic.AtomicBoolean;
2425

2526
import org.apache.hadoop.classification.InterfaceAudience;
@@ -35,6 +36,9 @@
3536
import org.apache.hadoop.yarn.api.records.Resource;
3637
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
3738
import org.apache.hadoop.yarn.api.records.ResourceInformation;
39+
import org.apache.hadoop.yarn.metrics.CustomResourceMetricValue;
40+
import org.apache.hadoop.yarn.metrics.CustomResourceMetrics;
41+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
3842
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
3943

4044
@InterfaceAudience.Private
@@ -58,10 +62,19 @@ public class ClusterMetrics {
5862
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
5963
@Metric("Memory Capability") MutableGaugeLong capabilityMB;
6064
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
61-
@Metric("GPU Capability") MutableGaugeLong capabilityGPUs;
6265

6366
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
6467
"Metrics for the Yarn Cluster");
68+
69+
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX =
70+
"Capability.";
71+
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC =
72+
"NAME Capability";
73+
74+
private static CustomResourceMetrics customResourceMetrics;
75+
76+
private final CustomResourceMetricValue customResourceCapability =
77+
new CustomResourceMetricValue();
6578

6679
private static volatile ClusterMetrics INSTANCE = null;
6780
private static MetricsRegistry registry;
@@ -86,6 +99,17 @@ private static void registerMetrics() {
8699
if (ms != null) {
87100
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
88101
}
102+
103+
if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
104+
customResourceMetrics =
105+
new CustomResourceMetrics();
106+
Map<String, Long> customResources =
107+
customResourceMetrics.initAndGetCustomResources();
108+
customResourceMetrics.
109+
registerCustomResources(customResources,
110+
registry, CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX,
111+
CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC);
112+
}
89113
}
90114

91115
@VisibleForTesting
@@ -209,23 +233,20 @@ public long getCapabilityVirtualCores() {
209233
return capabilityVirtualCores.value();
210234
}
211235

212-
public long getCapabilityGPUs() {
213-
if (capabilityGPUs == null) {
214-
return 0;
215-
}
236+
public Map<String, Long> getCustomResourceCapability() {
237+
return customResourceCapability.getValues();
238+
}
216239

217-
return capabilityGPUs.value();
240+
public void setCustomResourceCapability(Resource res) {
241+
this.customResourceCapability.set(res);
218242
}
219243

220244
public void incrCapability(Resource res) {
221245
if (res != null) {
222246
capabilityMB.incr(res.getMemorySize());
223247
capabilityVirtualCores.incr(res.getVirtualCores());
224-
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
225-
.get(ResourceInformation.GPU_URI);
226-
if (gpuIndex != null) {
227-
capabilityGPUs.incr(res.
228-
getResourceValue(ResourceInformation.GPU_URI));
248+
if (customResourceCapability != null) {
249+
customResourceCapability.increase(res);
229250
}
230251
}
231252
}
@@ -234,11 +255,8 @@ public void decrCapability(Resource res) {
234255
if (res != null) {
235256
capabilityMB.decr(res.getMemorySize());
236257
capabilityVirtualCores.decr(res.getVirtualCores());
237-
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
238-
.get(ResourceInformation.GPU_URI);
239-
if (gpuIndex != null) {
240-
capabilityGPUs.decr(res.
241-
getResourceValue(ResourceInformation.GPU_URI));
258+
if (customResourceCapability != null) {
259+
customResourceCapability.decrease(res);
242260
}
243261
}
244262
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCSAllocateCustomResource.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ public void testClusterMetricsWithGPU()
231231
assertEquals("Cluster Capability Vcores incorrect",
232232
metrics.getCapabilityVirtualCores(), 4 * 8);
233233
assertEquals("Cluster Capability GPUs incorrect",
234-
metrics.getCapabilityGPUs(), 4 * 8);
234+
(metrics.getCustomResourceCapability()
235+
.get(GPU_URI)).longValue(), 4 * 8);
235236

236237
for (RMNode rmNode : rmNodes) {
237238
nodeTracker.removeNode(rmNode.getNodeID());
@@ -243,7 +244,8 @@ public void testClusterMetricsWithGPU()
243244
assertEquals("Cluster Capability Vcores incorrect",
244245
metrics.getCapabilityVirtualCores(), 0);
245246
assertEquals("Cluster Capability GPUs incorrect",
246-
metrics.getCapabilityGPUs(), 0);
247+
(metrics.getCustomResourceCapability()
248+
.get(GPU_URI)).longValue(), 0);
247249
ClusterMetrics.destroy();
248250
}
249251
}

0 commit comments

Comments
 (0)