Skip to content

Commit d5eed29

Browse files
leosunlibentito
authored andcommitted
Make DataNodePeerMetrics#minOutlierDetectionSamples configurable (apache#1314). Contributed by Lisheng Sun.
Signed-off-by: sunlisheng <sunlisheng@xiaomi.com> (cherry picked from commit 9152712) (cherry picked from commit b516d2e)
1 parent 6f6fa42 commit d5eed29

File tree

6 files changed

+42
-13
lines changed

6 files changed

+42
-13
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
560560
public static final String DFS_DATANODE_PEER_STATS_ENABLED_KEY =
561561
"dfs.datanode.peer.stats.enabled";
562562
public static final boolean DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT = false;
563+
public static final String
564+
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY =
565+
"dfs.datanode.peer.metrics.min.outlier.detection.samples";
566+
public static final long
567+
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT =
568+
1000;
563569
public static final String DFS_DATANODE_HOST_NAME_KEY =
564570
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
565571
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1432,7 +1432,7 @@ void startDataNode(List<StorageLocation> dataDirectories,
14321432

14331433
metrics = DataNodeMetrics.create(getConf(), getDisplayName());
14341434
peerMetrics = dnConf.peerStatsEnabled ?
1435-
DataNodePeerMetrics.create(getDisplayName()) : null;
1435+
DataNodePeerMetrics.create(getDisplayName(), getConf()) : null;
14361436
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
14371437

14381438
ecWorker = new ErasureCodingWorker(getConf(), this);

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
package org.apache.hadoop.hdfs.server.datanode.metrics;
2020

2121

22-
import com.google.common.annotations.VisibleForTesting;
2322
import org.apache.hadoop.classification.InterfaceAudience;
2423
import org.apache.hadoop.classification.InterfaceStability;
24+
import org.apache.hadoop.conf.Configuration;
2525
import org.apache.hadoop.metrics2.MetricsJsonBuilder;
2626
import org.apache.hadoop.metrics2.lib.MutableRollingAverages;
2727
import org.slf4j.Logger;
@@ -30,6 +30,9 @@
3030
import java.util.Map;
3131
import java.util.concurrent.ThreadLocalRandom;
3232

33+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT;
34+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY;
35+
3336
/**
3437
* This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for
3538
* various peer operations.
@@ -58,11 +61,13 @@ public class DataNodePeerMetrics {
5861
* for outlier detection. If the number of samples is below this then
5962
* outlier detection is skipped.
6063
*/
61-
@VisibleForTesting
62-
static final long MIN_OUTLIER_DETECTION_SAMPLES = 1000;
64+
private final long minOutlierDetectionSamples;
6365

64-
public DataNodePeerMetrics(final String name) {
66+
public DataNodePeerMetrics(final String name, Configuration conf) {
6567
this.name = name;
68+
minOutlierDetectionSamples = conf.getLong(
69+
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY,
70+
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT);
6671
this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES,
6772
LOW_THRESHOLD_MS);
6873
sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time");
@@ -72,15 +77,19 @@ public String name() {
7277
return name;
7378
}
7479

80+
long getMinOutlierDetectionSamples() {
81+
return minOutlierDetectionSamples;
82+
}
83+
7584
/**
7685
* Creates an instance of DataNodePeerMetrics, used for registration.
7786
*/
78-
public static DataNodePeerMetrics create(String dnName) {
87+
public static DataNodePeerMetrics create(String dnName, Configuration conf) {
7988
final String name = "DataNodePeerActivity-" + (dnName.isEmpty()
8089
? "UndefinedDataNodeName" + ThreadLocalRandom.current().nextInt()
8190
: dnName.replace(':', '-'));
8291

83-
return new DataNodePeerMetrics(name);
92+
return new DataNodePeerMetrics(name, conf);
8493
}
8594

8695
/**
@@ -122,7 +131,7 @@ public Map<String, Double> getOutliers() {
122131
// The metric name is the datanode ID.
123132
final Map<String, Double> stats =
124133
sendPacketDownstreamRollingAverages.getStats(
125-
MIN_OUTLIER_DETECTION_SAMPLES);
134+
minOutlierDetectionSamples);
126135
LOG.trace("DataNodePeerMetrics: Got stats: {}", stats);
127136

128137
return slowNodeDetector.getOutliers(stats);

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2103,6 +2103,15 @@
21032103
</description>
21042104
</property>
21052105

2106+
<property>
2107+
<name>dfs.datanode.peer.metrics.min.outlier.detection.samples</name>
2108+
<value>1000</value>
2109+
<description>
2110+
Minimum number of packet send samples which are required to qualify for outlier detection.
2111+
If the number of samples is below this then outlier detection is skipped.
2112+
</description>
2113+
</property>
2114+
21062115
<property>
21072116
<name>dfs.datanode.outliers.report.interval</name>
21082117
<value>30m</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodePeerMetrics.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public void testGetSendPacketDownstreamAvgInfo() throws Exception {
4646
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, true);
4747

4848
final DataNodePeerMetrics peerMetrics = DataNodePeerMetrics.create(
49-
"Sample-DataNode");
49+
"Sample-DataNode", conf);
5050
MetricsTestHelper.replaceRollingAveragesScheduler(
5151
peerMetrics.getSendPacketDownstreamRollingAverages(),
5252
numWindows, windowSize, TimeUnit.SECONDS);

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/metrics/TestDataNodeOutlierDetectionViaMetrics.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
package org.apache.hadoop.hdfs.server.datanode.metrics;
2020

2121
import com.google.common.base.Supplier;
22+
import org.apache.hadoop.conf.Configuration;
23+
import org.apache.hadoop.hdfs.HdfsConfiguration;
2224
import org.apache.hadoop.metrics2.lib.MetricsTestHelper;
2325
import org.apache.hadoop.test.GenericTestUtils;
2426
import org.apache.log4j.Level;
@@ -61,10 +63,13 @@ public class TestDataNodeOutlierDetectionViaMetrics {
6163

6264
private Random random = new Random(System.currentTimeMillis());
6365

66+
private Configuration conf;
67+
6468
@Before
6569
public void setup() {
6670
GenericTestUtils.setLogLevel(DataNodePeerMetrics.LOG, Level.ALL);
6771
GenericTestUtils.setLogLevel(OutlierDetector.LOG, Level.ALL);
72+
conf = new HdfsConfiguration();
6873
}
6974

7075
/**
@@ -75,7 +80,7 @@ public void testOutlierIsDetected() throws Exception {
7580
final String slowNodeName = "SlowNode";
7681

7782
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
78-
"PeerMetrics-For-Test");
83+
"PeerMetrics-For-Test", conf);
7984

8085
MetricsTestHelper.replaceRollingAveragesScheduler(
8186
peerMetrics.getSendPacketDownstreamRollingAverages(),
@@ -107,7 +112,7 @@ public Boolean get() {
107112
@Test
108113
public void testWithNoOutliers() throws Exception {
109114
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
110-
"PeerMetrics-For-Test");
115+
"PeerMetrics-For-Test", conf);
111116

112117
MetricsTestHelper.replaceRollingAveragesScheduler(
113118
peerMetrics.getSendPacketDownstreamRollingAverages(),
@@ -134,7 +139,7 @@ public void injectFastNodesSamples(DataNodePeerMetrics peerMetrics) {
134139
final String nodeName = "FastNode-" + nodeIndex;
135140
LOG.info("Generating stats for node {}", nodeName);
136141
for (int i = 0;
137-
i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES;
142+
i < 2 * peerMetrics.getMinOutlierDetectionSamples();
138143
++i) {
139144
peerMetrics.addSendPacketDownstream(
140145
nodeName, random.nextInt(FAST_NODE_MAX_LATENCY_MS));
@@ -151,7 +156,7 @@ public void injectSlowNodeSamples(
151156

152157
// And the one slow node.
153158
for (int i = 0;
154-
i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES;
159+
i < 2 * peerMetrics.getMinOutlierDetectionSamples();
155160
++i) {
156161
peerMetrics.addSendPacketDownstream(
157162
slowNodeName, SLOW_NODE_LATENCY_MS);

0 commit comments

Comments
 (0)