Skip to content

Commit c46d43a

Browse files
ChenSammixiaoyuyao
authored andcommitted
HDDS-1553. Add metrics in rack aware container placement policy. (#1361)
1 parent 7357570 commit c46d43a

File tree

13 files changed

+368
-30
lines changed

13 files changed

+368
-30
lines changed

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ public boolean isRunning() {
191191
@VisibleForTesting
192192
@SuppressFBWarnings(value="NN_NAKED_NOTIFY",
193193
justification="Used only for testing")
194-
synchronized void processContainersNow() {
194+
public synchronized void processContainersNow() {
195195
notify();
196196
}
197197

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/ContainerPlacementPolicyFactory.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,17 @@ private ContainerPlacementPolicyFactory() {
4343

4444
public static ContainerPlacementPolicy getPolicy(Configuration conf,
4545
final NodeManager nodeManager, NetworkTopology clusterMap,
46-
final boolean fallback) throws SCMException{
46+
final boolean fallback, SCMContainerPlacementMetrics metrics)
47+
throws SCMException{
4748
final Class<? extends ContainerPlacementPolicy> placementClass = conf
4849
.getClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
4950
OZONE_SCM_CONTAINER_PLACEMENT_IMPL_DEFAULT,
5051
ContainerPlacementPolicy.class);
5152
Constructor<? extends ContainerPlacementPolicy> constructor;
5253
try {
5354
constructor = placementClass.getDeclaredConstructor(NodeManager.class,
54-
Configuration.class, NetworkTopology.class, boolean.class);
55+
Configuration.class, NetworkTopology.class, boolean.class,
56+
SCMContainerPlacementMetrics.class);
5557
LOG.info("Create container placement policy of type " +
5658
placementClass.getCanonicalName());
5759
} catch (NoSuchMethodException e) {
@@ -64,7 +66,8 @@ public static ContainerPlacementPolicy getPolicy(Configuration conf,
6466
}
6567

6668
try {
67-
return constructor.newInstance(nodeManager, conf, clusterMap, fallback);
69+
return constructor.newInstance(nodeManager, conf, clusterMap, fallback,
70+
metrics);
6871
} catch (Exception e) {
6972
throw new RuntimeException("Failed to instantiate class " +
7073
placementClass.getCanonicalName() + " for " + e.getMessage());

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementCapacity.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public final class SCMContainerPlacementCapacity extends SCMCommonPolicy {
7979
*/
8080
public SCMContainerPlacementCapacity(final NodeManager nodeManager,
8181
final Configuration conf, final NetworkTopology networkTopology,
82-
final boolean fallback) {
82+
final boolean fallback, final SCMContainerPlacementMetrics metrics) {
8383
super(nodeManager, conf);
8484
}
8585

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdds.scm.container.placement.algorithms;
19+
20+
import com.google.common.annotations.VisibleForTesting;
21+
import org.apache.hadoop.metrics2.MetricsCollector;
22+
import org.apache.hadoop.metrics2.MetricsInfo;
23+
import org.apache.hadoop.metrics2.MetricsSource;
24+
import org.apache.hadoop.metrics2.MetricsSystem;
25+
import org.apache.hadoop.metrics2.annotation.Metric;
26+
import org.apache.hadoop.metrics2.annotation.Metrics;
27+
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
28+
import org.apache.hadoop.metrics2.lib.Interns;
29+
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
30+
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
31+
32+
/**
33+
* This class is for maintaining Topology aware container placement statistics.
34+
*/
35+
@Metrics(about="SCM Container Placement Metrics", context = "ozone")
36+
public class SCMContainerPlacementMetrics implements MetricsSource {
37+
public static final String SOURCE_NAME =
38+
SCMContainerPlacementMetrics.class.getSimpleName();
39+
private static final MetricsInfo RECORD_INFO = Interns.info(SOURCE_NAME,
40+
"SCM Placement Metrics");
41+
private static MetricsRegistry registry;
42+
43+
// total datanode allocation request count
44+
@Metric private MutableCounterLong datanodeRequestCount;
45+
// datanode allocation attempt count, including success, fallback and failed
46+
@Metric private MutableCounterLong datanodeChooseAttemptCount;
47+
// datanode successful allocation count
48+
@Metric private MutableCounterLong datanodeChooseSuccessCount;
49+
// datanode allocated with some allocation constrains compromised
50+
@Metric private MutableCounterLong datanodeChooseFallbackCount;
51+
52+
public SCMContainerPlacementMetrics() {
53+
}
54+
55+
public static SCMContainerPlacementMetrics create() {
56+
MetricsSystem ms = DefaultMetricsSystem.instance();
57+
registry = new MetricsRegistry(RECORD_INFO);
58+
return ms.register(SOURCE_NAME, "SCM Placement Metrics",
59+
new SCMContainerPlacementMetrics());
60+
}
61+
62+
public void incrDatanodeRequestCount(long count) {
63+
System.out.println("request + 1");
64+
this.datanodeRequestCount.incr(count);
65+
}
66+
67+
public void incrDatanodeChooseSuccessCount() {
68+
System.out.println("success + 1");
69+
this.datanodeChooseSuccessCount.incr(1);
70+
}
71+
72+
public void incrDatanodeChooseFallbackCount() {
73+
System.out.println("fallback + 1");
74+
this.datanodeChooseFallbackCount.incr(1);
75+
}
76+
77+
public void incrDatanodeChooseAttemptCount() {
78+
System.out.println("attempt + 1");
79+
this.datanodeChooseAttemptCount.incr(1);
80+
}
81+
82+
public void unRegister() {
83+
MetricsSystem ms = DefaultMetricsSystem.instance();
84+
ms.unregisterSource(SOURCE_NAME);
85+
}
86+
87+
@VisibleForTesting
88+
public long getDatanodeRequestCount() {
89+
return this.datanodeRequestCount.value();
90+
}
91+
92+
@VisibleForTesting
93+
public long getDatanodeChooseSuccessCount() {
94+
return this.datanodeChooseSuccessCount.value();
95+
}
96+
97+
@VisibleForTesting
98+
public long getDatanodeChooseFallbackCount() {
99+
return this.datanodeChooseFallbackCount.value();
100+
}
101+
102+
@VisibleForTesting
103+
public long getDatanodeChooseAttemptCount() {
104+
return this.datanodeChooseAttemptCount.value();
105+
}
106+
107+
@Override
108+
public void getMetrics(MetricsCollector collector, boolean all) {
109+
registry.snapshot(collector.addRecord(registry.info().name()), true);
110+
}
111+
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRackAware.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public final class SCMContainerPlacementRackAware extends SCMCommonPolicy {
5353
private boolean fallback;
5454
private static final int RACK_LEVEL = 1;
5555
private static final int MAX_RETRY= 3;
56+
private final SCMContainerPlacementMetrics metrics;
5657

5758
/**
5859
* Constructs a Container Placement with rack awareness.
@@ -66,10 +67,11 @@ public final class SCMContainerPlacementRackAware extends SCMCommonPolicy {
6667
*/
6768
public SCMContainerPlacementRackAware(final NodeManager nodeManager,
6869
final Configuration conf, final NetworkTopology networkTopology,
69-
final boolean fallback) {
70+
final boolean fallback, final SCMContainerPlacementMetrics metrics) {
7071
super(nodeManager, conf);
7172
this.networkTopology = networkTopology;
7273
this.fallback = fallback;
74+
this.metrics = metrics;
7375
}
7476

7577
/**
@@ -93,7 +95,7 @@ public List<DatanodeDetails> chooseDatanodes(
9395
List<DatanodeDetails> excludedNodes, List<DatanodeDetails> favoredNodes,
9496
int nodesRequired, final long sizeRequired) throws SCMException {
9597
Preconditions.checkArgument(nodesRequired > 0);
96-
98+
metrics.incrDatanodeRequestCount(nodesRequired);
9799
int datanodeCount = networkTopology.getNumOfLeafNode(NetConstants.ROOT);
98100
int excludedNodesCount = excludedNodes == null ? 0 : excludedNodes.size();
99101
if (datanodeCount < nodesRequired + excludedNodesCount) {
@@ -241,16 +243,19 @@ private Node chooseNode(List<Node> excludedNodes, Node affinityNode,
241243
int ancestorGen = RACK_LEVEL;
242244
int maxRetry = MAX_RETRY;
243245
List<Node> excludedNodesForCapacity = null;
246+
boolean isFallbacked = false;
244247
while(true) {
245248
Node node = networkTopology.chooseRandom(NetConstants.ROOT, null,
246249
excludedNodes, affinityNode, ancestorGen);
250+
metrics.incrDatanodeChooseAttemptCount();
247251
if (node == null) {
248252
// cannot find the node which meets all constrains
249253
LOG.warn("Failed to find the datanode. excludedNodes:" +
250254
(excludedNodes == null ? "" : excludedNodes.toString()) +
251255
", affinityNode:" +
252256
(affinityNode == null ? "" : affinityNode.getNetworkFullPath()));
253257
if (fallback) {
258+
isFallbacked = true;
254259
// fallback, don't consider the affinity node
255260
if (affinityNode != null) {
256261
affinityNode = null;
@@ -267,11 +272,15 @@ private Node chooseNode(List<Node> excludedNodes, Node affinityNode,
267272
" excludedNodes and affinityNode constrains.", null);
268273
}
269274
if (hasEnoughSpace((DatanodeDetails)node, sizeRequired)) {
270-
LOG.debug("Datanode {} is chosen. Required size is {}",
275+
LOG.warn("Datanode {} is chosen. Required size is {}",
271276
node.toString(), sizeRequired);
272277
if (excludedNodes != null && excludedNodesForCapacity != null) {
273278
excludedNodes.removeAll(excludedNodesForCapacity);
274279
}
280+
metrics.incrDatanodeChooseSuccessCount();
281+
if (isFallbacked) {
282+
metrics.incrDatanodeChooseFallbackCount();
283+
}
275284
return node;
276285
} else {
277286
maxRetry--;

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMContainerPlacementRandom.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public final class SCMContainerPlacementRandom extends SCMCommonPolicy
5151
*/
5252
public SCMContainerPlacementRandom(final NodeManager nodeManager,
5353
final Configuration conf, final NetworkTopology networkTopology,
54-
final boolean fallback) {
54+
final boolean fallback, final SCMContainerPlacementMetrics metrics) {
5555
super(nodeManager, conf);
5656
}
5757

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
import org.apache.hadoop.hdds.scm.block.PendingDeleteHandler;
4343
import org.apache.hadoop.hdds.scm.container.ReplicationManager.ReplicationManagerConfiguration;
4444
import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicyFactory;
45+
import org.apache.hadoop.hdds.scm.container.placement.algorithms
46+
.SCMContainerPlacementMetrics;
4547
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
4648
import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl;
4749
import org.apache.hadoop.hdds.scm.safemode.SafeModeHandler;
@@ -387,9 +389,11 @@ private void initializeSystemManagers(OzoneConfiguration conf,
387389
conf, scmStorageConfig, eventQueue, clusterMap);
388390
}
389391

392+
SCMContainerPlacementMetrics placementMetrics =
393+
SCMContainerPlacementMetrics.create();
390394
ContainerPlacementPolicy containerPlacementPolicy =
391395
ContainerPlacementPolicyFactory.getPolicy(conf, scmNodeManager,
392-
clusterMap, true);
396+
clusterMap, true, placementMetrics);
393397

394398
if (configurator.getPipelineManager() != null) {
395399
pipelineManager = configurator.getPipelineManager();

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ public void testRackAwarePolicy() throws IOException {
100100
.thenReturn(new SCMNodeMetric(storageCapacity, 70L, 30L));
101101

102102
ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory
103-
.getPolicy(conf, nodeManager, cluster, true);
103+
.getPolicy(conf, nodeManager, cluster, true,
104+
SCMContainerPlacementMetrics.create());
104105

105106
int nodeNum = 3;
106107
List<DatanodeDetails> datanodeDetails =
@@ -117,7 +118,7 @@ public void testRackAwarePolicy() throws IOException {
117118
@Test
118119
public void testDefaultPolicy() throws IOException {
119120
ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory
120-
.getPolicy(conf, null, null, true);
121+
.getPolicy(conf, null, null, true, null);
121122
Assert.assertSame(SCMContainerPlacementRandom.class, policy.getClass());
122123
}
123124

@@ -138,14 +139,14 @@ public void testConstuctorNotFound() throws SCMException {
138139
// set a placement class which does't have the right constructor implemented
139140
conf.set(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
140141
DummyImpl.class.getName());
141-
ContainerPlacementPolicyFactory.getPolicy(conf, null, null, true);
142+
ContainerPlacementPolicyFactory.getPolicy(conf, null, null, true, null);
142143
}
143144

144145
@Test(expected = RuntimeException.class)
145146
public void testClassNotImplemented() throws SCMException {
146147
// set a placement class not implemented
147148
conf.set(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
148149
"org.apache.hadoop.hdds.scm.container.placement.algorithm.HelloWorld");
149-
ContainerPlacementPolicyFactory.getPolicy(conf, null, null, true);
150+
ContainerPlacementPolicyFactory.getPolicy(conf, null, null, true, null);
150151
}
151152
}

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementCapacity.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ public void chooseDatanodes() throws SCMException {
6464
.thenReturn(new SCMNodeMetric(100L, 70L, 30L));
6565

6666
SCMContainerPlacementCapacity scmContainerPlacementRandom =
67-
new SCMContainerPlacementCapacity(mockNodeManager, conf, null, true);
67+
new SCMContainerPlacementCapacity(mockNodeManager, conf, null, true,
68+
null);
6869

6970
List<DatanodeDetails> existingNodes = new ArrayList<>();
7071
existingNodes.add(datanodes.get(0));

0 commit comments

Comments
 (0)