Skip to content

Commit 5d6ab15

Browse files
authored
YARN-11354. [Federation] Add Yarn Router's NodeLabel Web Page. (#5073)
1 parent 2ba982a commit 5d6ab15

File tree

22 files changed

+544
-29
lines changed

22 files changed

+544
-29
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Resource.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,10 @@ public String toString() {
467467
return getFormattedString(String.valueOf(getMemorySize()));
468468
}
469469

470+
public String toFormattedString() {
471+
return getFormattedString();
472+
}
473+
470474
private String getFormattedString(String memory) {
471475
StringBuilder sb = new StringBuilder();
472476

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWSConsts.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ public final class RMWSConsts {
126126
/** Path for {@code RMWebServiceProtocol#getClusterNodeLabels}. */
127127
public static final String GET_NODE_LABELS = "/get-node-labels";
128128

129+
/** Path for {@code RMWebServiceProtocol#getRMNodeLabels}. */
130+
public static final String GET_RM_NODE_LABELS = "/get-rm-node-labels";
131+
129132
/** Path for {@code RMWebServiceProtocol#addToClusterNodeLabels}. */
130133
public static final String ADD_NODE_LABELS = "/add-node-labels";
131134

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServiceProtocol.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,8 @@ Response updateAppState(AppState targetState, HttpServletRequest hsr,
325325
*/
326326
NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr) throws IOException;
327327

328+
NodeLabelsInfo getRMNodeLabels(HttpServletRequest hsr) throws IOException;
329+
328330
/**
329331
* This method retrieves all the node within multiple node labels in the
330332
* cluster, and it is reachable by using {@link RMWSConsts#LABEL_MAPPINGS}.

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
130130
import org.apache.hadoop.yarn.factories.RecordFactory;
131131
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
132+
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
132133
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
133134
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
134135
import org.apache.hadoop.yarn.server.resourcemanager.AdminService;
@@ -138,6 +139,7 @@
138139
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
139140
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
140141
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NodeLabelsUtils;
142+
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
141143
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
142144
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
143145
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -1404,6 +1406,32 @@ public NodeLabelsInfo getClusterNodeLabels(@Context HttpServletRequest hsr)
14041406
return new NodeLabelsInfo(nodeLabelsInfo);
14051407
}
14061408

1409+
@GET
1410+
@Path(RMWSConsts.GET_RM_NODE_LABELS)
1411+
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,
1412+
MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
1413+
public NodeLabelsInfo getRMNodeLabels(@Context HttpServletRequest hsr)
1414+
throws IOException {
1415+
1416+
initForReadableEndpoints();
1417+
RMNodeLabelsManager nlm = rm.getRMContext().getNodeLabelManager();
1418+
1419+
ArrayList<NodeLabelInfo> nodeLabelsInfo = new ArrayList<>();
1420+
for (RMNodeLabel info : nlm.pullRMNodeLabelsInfo()) {
1421+
String labelName = info.getLabelName().isEmpty() ?
1422+
NodeLabel.DEFAULT_NODE_LABEL_PARTITION : info.getLabelName();
1423+
int activeNMs = info.getNumActiveNMs();
1424+
PartitionInfo partitionInfo =
1425+
new PartitionInfo(new ResourceInfo(info.getResource()));
1426+
NodeLabel nodeLabel = NodeLabel.newInstance(labelName, info.getIsExclusive());
1427+
NodeLabelInfo nodeLabelInfo = new NodeLabelInfo(nodeLabel, partitionInfo);
1428+
nodeLabelInfo.setActiveNMs(activeNMs);
1429+
nodeLabelsInfo.add(nodeLabelInfo);
1430+
}
1431+
1432+
return new NodeLabelsInfo(nodeLabelsInfo);
1433+
}
1434+
14071435
@POST
14081436
@Path(RMWSConsts.ADD_NODE_LABELS)
14091437
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8,

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeLabelInfo.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public class NodeLabelInfo {
3131
private String name;
3232
private boolean exclusivity;
3333
private PartitionInfo partitionInfo;
34+
private Integer activeNMs;
3435

3536
public NodeLabelInfo() {
3637
// JAXB needs this
@@ -68,6 +69,26 @@ public PartitionInfo getPartitionInfo() {
6869
return partitionInfo;
6970
}
7071

72+
public Integer getActiveNMs() {
73+
return activeNMs;
74+
}
75+
76+
public void setActiveNMs(Integer activeNMs) {
77+
this.activeNMs = activeNMs;
78+
}
79+
80+
public void setName(String name) {
81+
this.name = name;
82+
}
83+
84+
public void setExclusivity(boolean exclusivity) {
85+
this.exclusivity = exclusivity;
86+
}
87+
88+
public void setPartitionInfo(PartitionInfo partitionInfo) {
89+
this.partitionInfo = partitionInfo;
90+
}
91+
7192
@Override
7293
public boolean equals(Object obj) {
7394
if (this == obj) {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ResourceInfo.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ public String toString() {
7272
return getResource().toString();
7373
}
7474

75+
public String toFormattedString() {
76+
return getResource().toFormattedString();
77+
}
78+
7579
public void setMemory(int memory) {
7680
if (resources == null) {
7781
resources = Resource.newInstance(memory, vCores);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ public final class RouterMetrics {
121121
private MutableGaugeInt numGetAppTimeoutFailedRetrieved;
122122
@Metric("# of getAppTimeouts failed to be retrieved")
123123
private MutableGaugeInt numGetAppTimeoutsFailedRetrieved;
124+
@Metric("# of getRMNodeLabels failed to be retrieved")
125+
private MutableGaugeInt numGetRMNodeLabelsFailedRetrieved;
124126
@Metric("# of checkUserAccessToQueue failed to be retrieved")
125127
private MutableGaugeInt numCheckUserAccessToQueueFailedRetrieved;
126128

@@ -205,6 +207,8 @@ public final class RouterMetrics {
205207
private MutableRate totalSucceededGetAppTimeoutRetrieved;
206208
@Metric("Total number of successful Retrieved GetAppTimeouts and latency(ms)")
207209
private MutableRate totalSucceededGetAppTimeoutsRetrieved;
210+
@Metric("Total number of successful Retrieved GetRMNodeLabels and latency(ms)")
211+
private MutableRate totalSucceededGetRMNodeLabelsRetrieved;
208212
@Metric("Total number of successful Retrieved CheckUserAccessToQueue and latency(ms)")
209213
private MutableRate totalSucceededCheckUserAccessToQueueRetrieved;
210214

@@ -251,6 +255,7 @@ public final class RouterMetrics {
251255
private MutableQuantiles getUpdateQueueLatency;
252256
private MutableQuantiles getAppTimeoutLatency;
253257
private MutableQuantiles getAppTimeoutsLatency;
258+
private MutableQuantiles getRMNodeLabelsLatency;
254259
private MutableQuantiles checkUserAccessToQueueLatency;
255260

256261
private static volatile RouterMetrics instance = null;
@@ -405,6 +410,9 @@ private RouterMetrics() {
405410
getAppTimeoutsLatency = registry.newQuantiles("getAppTimeoutsLatency",
406411
"latency of get apptimeouts timeouts", "ops", "latency", 10);
407412

413+
getRMNodeLabelsLatency = registry.newQuantiles("getRMNodeLabelsLatency",
414+
"latency of get rmnodelabels timeouts", "ops", "latency", 10);
415+
408416
checkUserAccessToQueueLatency = registry.newQuantiles("checkUserAccessToQueueLatency",
409417
"latency of get apptimeouts timeouts", "ops", "latency", 10);
410418
}
@@ -628,6 +636,11 @@ public long getNumSucceededGetAppTimeoutsRetrieved() {
628636
return totalSucceededGetAppTimeoutsRetrieved.lastStat().numSamples();
629637
}
630638

639+
@VisibleForTesting
640+
public long getNumSucceededGetRMNodeLabelsRetrieved() {
641+
return totalSucceededGetRMNodeLabelsRetrieved.lastStat().numSamples();
642+
}
643+
631644
@VisibleForTesting
632645
public long getNumSucceededCheckUserAccessToQueueRetrievedRetrieved() {
633646
return totalSucceededCheckUserAccessToQueueRetrieved.lastStat().numSamples();
@@ -833,6 +846,11 @@ public double getLatencySucceededGetAppTimeoutsRetrieved() {
833846
return totalSucceededGetAppTimeoutsRetrieved.lastStat().mean();
834847
}
835848

849+
@VisibleForTesting
850+
public double getLatencySucceededGetRMNodeLabelsRetrieved() {
851+
return totalSucceededGetRMNodeLabelsRetrieved.lastStat().mean();
852+
}
853+
836854
@VisibleForTesting
837855
public double getLatencySucceededCheckUserAccessToQueueRetrieved() {
838856
return totalSucceededCheckUserAccessToQueueRetrieved.lastStat().mean();
@@ -1019,6 +1037,10 @@ public int getAppTimeoutsFailedRetrieved() {
10191037
return numGetAppTimeoutsFailedRetrieved.value();
10201038
}
10211039

1040+
public int getRMNodeLabelsFailedRetrieved() {
1041+
return numGetRMNodeLabelsFailedRetrieved.value();
1042+
}
1043+
10221044
public int getCheckUserAccessToQueueFailedRetrieved() {
10231045
return numCheckUserAccessToQueueFailedRetrieved.value();
10241046
}
@@ -1223,6 +1245,11 @@ public void succeededGetAppTimeoutsRetrieved(long duration) {
12231245
getAppTimeoutsLatency.add(duration);
12241246
}
12251247

1248+
public void succeededGetRMNodeLabelsRetrieved(long duration) {
1249+
totalSucceededGetRMNodeLabelsRetrieved.add(duration);
1250+
getRMNodeLabelsLatency.add(duration);
1251+
}
1252+
12261253
public void succeededCheckUserAccessToQueueRetrieved(long duration) {
12271254
totalSucceededCheckUserAccessToQueueRetrieved.add(duration);
12281255
checkUserAccessToQueueLatency.add(duration);
@@ -1388,6 +1415,10 @@ public void incrGetAppTimeoutsFailedRetrieved() {
13881415
numGetAppTimeoutsFailedRetrieved.incr();
13891416
}
13901417

1418+
public void incrGetRMNodeLabelsFailedRetrieved() {
1419+
numGetRMNodeLabelsFailedRetrieved.incr();
1420+
}
1421+
13911422
public void incrCheckUserAccessToQueueFailedRetrieved() {
13921423
numCheckUserAccessToQueueFailedRetrieved.incr();
13931424
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/DefaultRequestInterceptorREST.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,4 +602,12 @@ public Response signalToContainer(String containerId, String command,
602602
+ containerId + "/" + RMWSConsts.SIGNAL + "/" + command, null,
603603
null, getConf(), client);
604604
}
605+
606+
@Override
607+
public NodeLabelsInfo getRMNodeLabels(HttpServletRequest hsr) {
608+
return RouterWebServiceUtil.genericForward(webAppAddress, hsr,
609+
NodeLabelsInfo.class, HTTPMethods.GET,
610+
RMWSConsts.RM_WEB_SERVICE_PATH + RMWSConsts.GET_RM_NODE_LABELS,
611+
null, null, getConf(), client);
612+
}
605613
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,10 +1273,39 @@ public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
12731273
routerMetrics.incrNodeToLabelsFailedRetrieved();
12741274
RouterServerUtil.logAndThrowIOException("getNodeToLabels error.", e);
12751275
}
1276-
routerMetrics.incrGetAppStatisticsFailedRetrieved();
1276+
routerMetrics.incrNodeToLabelsFailedRetrieved();
12771277
throw new RuntimeException("getNodeToLabels Failed.");
12781278
}
12791279

1280+
@Override
1281+
public NodeLabelsInfo getRMNodeLabels(HttpServletRequest hsr) throws IOException {
1282+
try {
1283+
long startTime = clock.getTime();
1284+
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
1285+
final HttpServletRequest hsrCopy = clone(hsr);
1286+
Class[] argsClasses = new Class[]{HttpServletRequest.class};
1287+
Object[] args = new Object[]{hsrCopy};
1288+
ClientMethod remoteMethod = new ClientMethod("getRMNodeLabels", argsClasses, args);
1289+
Map<SubClusterInfo, NodeLabelsInfo> nodeToLabelsInfoMap =
1290+
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
1291+
NodeLabelsInfo nodeToLabelsInfo =
1292+
RouterWebServiceUtil.mergeNodeLabelsInfo(nodeToLabelsInfoMap);
1293+
if (nodeToLabelsInfo != null) {
1294+
long stopTime = clock.getTime();
1295+
routerMetrics.succeededGetRMNodeLabelsRetrieved(stopTime - startTime);
1296+
return nodeToLabelsInfo;
1297+
}
1298+
} catch (NotFoundException e) {
1299+
routerMetrics.incrGetRMNodeLabelsFailedRetrieved();
1300+
RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
1301+
} catch (YarnException e) {
1302+
routerMetrics.incrGetRMNodeLabelsFailedRetrieved();
1303+
RouterServerUtil.logAndThrowIOException("getRMNodeLabels error.", e);
1304+
}
1305+
routerMetrics.incrGetRMNodeLabelsFailedRetrieved();
1306+
throw new RuntimeException("getRMNodeLabels Failed.");
1307+
}
1308+
12801309
@Override
12811310
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
12821311
throws IOException {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/NavBlock.java

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import com.google.inject.Inject;
2222
import org.apache.hadoop.conf.Configuration;
23-
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
2423
import org.apache.hadoop.yarn.server.router.Router;
2524
import org.apache.hadoop.yarn.server.webapp.WebPageUtils;
2625
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
@@ -49,35 +48,14 @@ public void render(Block html) {
4948

5049
List<String> subClusterIds = getActiveSubClusterIds();
5150

52-
Hamlet.UL<Hamlet.LI<Hamlet.UL<Hamlet.DIV<Hamlet>>>> subAppsList1 =
53-
mainList.li().a(url("nodes"), "Nodes").ul().$style("padding:0.3em 1em 0.1em 2em");
54-
5551
// ### nodes info
56-
subAppsList1.li().__();
57-
for (String subClusterId : subClusterIds) {
58-
subAppsList1.li().a(url("nodes", subClusterId), subClusterId).__();
59-
}
60-
subAppsList1.__().__();
52+
initNodesMenu(mainList, subClusterIds);
6153

62-
// ### applications info
63-
Hamlet.UL<Hamlet.LI<Hamlet.UL<Hamlet.DIV<Hamlet>>>> subAppsList2 =
64-
mainList.li().a(url("apps"), "Applications").ul();
54+
// ### nodelabels info
55+
initNodeLabelsMenu(mainList, subClusterIds);
6556

66-
subAppsList2.li().__();
67-
for (String subClusterId : subClusterIds) {
68-
Hamlet.LI<Hamlet.UL<Hamlet.LI<Hamlet.UL<Hamlet.DIV<Hamlet>>>>> subAppsList3 = subAppsList2.
69-
li().a(url("apps", subClusterId), subClusterId);
70-
Hamlet.UL<Hamlet.LI<Hamlet.UL<Hamlet.LI<Hamlet.UL<Hamlet.DIV<Hamlet>>>>>> subAppsList4 =
71-
subAppsList3.ul().$style("padding:0.3em 1em 0.1em 2em");
72-
subAppsList4.li().__();
73-
for (YarnApplicationState state : YarnApplicationState.values()) {
74-
subAppsList4.
75-
li().a(url("apps", subClusterId, state.toString()), state.toString()).__();
76-
}
77-
subAppsList4.li().__().__();
78-
subAppsList3.__();
79-
}
80-
subAppsList2.__().__();
57+
// ### applications info
58+
initApplicationsMenu(mainList, subClusterIds);
8159

8260
// ### tools
8361
Hamlet.DIV<Hamlet> sectionBefore = mainList.__();

0 commit comments

Comments
 (0)