Skip to content

Commit ee450bb

Browse files
authored
HDFS-16528. Reconfigure slow peer enable for Namenode (#4251)
1 parent cc204c9 commit ee450bb

File tree

6 files changed

+212
-40
lines changed

6 files changed

+212
-40
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,6 @@ public class DatanodeManager {
193193
private final HashMap<String, Integer> datanodesSoftwareVersions =
194194
new HashMap<>(4, 0.75f);
195195

196-
/**
197-
* True if we should process latency metrics from downstream peers.
198-
*/
199-
private final boolean dataNodePeerStatsEnabled;
200196
/**
201197
* True if we should process latency metrics from individual DN disks.
202198
*/
@@ -210,7 +206,7 @@ public class DatanodeManager {
210206
private static final String IP_PORT_SEPARATOR = ":";
211207

212208
@Nullable
213-
private final SlowPeerTracker slowPeerTracker;
209+
private SlowPeerTracker slowPeerTracker;
214210
private static Set<String> slowNodesUuidSet = Sets.newConcurrentHashSet();
215211
private Daemon slowPeerCollectorDaemon;
216212
private final long slowPeerCollectionInterval;
@@ -247,24 +243,23 @@ public class DatanodeManager {
247243
this.datanodeAdminManager = new DatanodeAdminManager(namesystem,
248244
blockManager, heartbeatManager);
249245
this.fsClusterStats = newFSClusterStats();
250-
this.dataNodePeerStatsEnabled = conf.getBoolean(
251-
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
252-
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
253246
this.dataNodeDiskStatsEnabled = Util.isDiskStatsEnabled(conf.getInt(
254247
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
255248
DFSConfigKeys.
256249
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_DEFAULT));
257250
final Timer timer = new Timer();
258-
this.slowPeerTracker = dataNodePeerStatsEnabled ?
259-
new SlowPeerTracker(conf, timer) : null;
251+
final boolean dataNodePeerStatsEnabledVal =
252+
conf.getBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
253+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
254+
initSlowPeerTracker(conf, timer, dataNodePeerStatsEnabledVal);
260255
this.maxSlowPeerReportNodes = conf.getInt(
261256
DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY,
262257
DFSConfigKeys.DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT);
263258
this.slowPeerCollectionInterval = conf.getTimeDuration(
264259
DFSConfigKeys.DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_KEY,
265260
DFSConfigKeys.DFS_NAMENODE_SLOWPEER_COLLECT_INTERVAL_DEFAULT,
266261
TimeUnit.MILLISECONDS);
267-
if (slowPeerTracker != null) {
262+
if (slowPeerTracker.isSlowPeerTrackerEnabled()) {
268263
startSlowPeerCollector();
269264
}
270265
this.slowDiskTracker = dataNodeDiskStatsEnabled ?
@@ -366,6 +361,21 @@ public class DatanodeManager {
366361
DFSConfigKeys.DFS_NAMENODE_BLOCKS_PER_POSTPONEDBLOCKS_RESCAN_KEY_DEFAULT);
367362
}
368363

364+
/**
365+
* Determines whether slow peer tracker should be enabled. If dataNodePeerStatsEnabledVal is
366+
* true, slow peer tracker is initialized.
367+
*
368+
* @param conf The configuration to use while initializing slowPeerTracker.
369+
* @param timer Timer object for slowPeerTracker.
370+
* @param dataNodePeerStatsEnabled To determine whether slow peer tracking should be enabled.
371+
*/
372+
public void initSlowPeerTracker(Configuration conf, Timer timer,
373+
boolean dataNodePeerStatsEnabled) {
374+
this.slowPeerTracker = dataNodePeerStatsEnabled ?
375+
new SlowPeerTracker(conf, timer) :
376+
new SlowPeerDisabledTracker(conf, timer);
377+
}
378+
369379
private void startSlowPeerCollector() {
370380
if (slowPeerCollectorDaemon != null) {
371381
return;
@@ -1856,12 +1866,13 @@ public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
18561866
nodeinfo.setBalancerBandwidth(0);
18571867
}
18581868

1859-
if (slowPeerTracker != null) {
1869+
Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned");
1870+
1871+
if (slowPeerTracker.isSlowPeerTrackerEnabled()) {
18601872
final Map<String, Double> slowPeersMap = slowPeers.getSlowPeers();
18611873
if (!slowPeersMap.isEmpty()) {
18621874
if (LOG.isDebugEnabled()) {
1863-
LOG.debug("DataNode " + nodeReg + " reported slow peers: " +
1864-
slowPeersMap);
1875+
LOG.debug("DataNode " + nodeReg + " reported slow peers: " + slowPeersMap);
18651876
}
18661877
for (String slowNodeId : slowPeersMap.keySet()) {
18671878
slowPeerTracker.addReport(slowNodeId, nodeReg.getIpcAddr(false));
@@ -2109,7 +2120,8 @@ public void setBlockInvalidateLimit(int configuredBlockInvalidateLimit) {
21092120
* @return
21102121
*/
21112122
public String getSlowPeersReport() {
2112-
return slowPeerTracker != null ? slowPeerTracker.getJson() : null;
2123+
Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned");
2124+
return slowPeerTracker.getJson();
21132125
}
21142126

21152127
/**
@@ -2118,11 +2130,9 @@ public String getSlowPeersReport() {
21182130
*/
21192131
public Set<String> getSlowPeersUuidSet() {
21202132
Set<String> slowPeersUuidSet = Sets.newConcurrentHashSet();
2121-
if (slowPeerTracker == null) {
2122-
return slowPeersUuidSet;
2123-
}
2124-
ArrayList<String> slowNodes =
2125-
slowPeerTracker.getSlowNodes(maxSlowPeerReportNodes);
2133+
List<String> slowNodes;
2134+
Preconditions.checkNotNull(slowPeerTracker, "slowPeerTracker should not be un-assigned");
2135+
slowNodes = slowPeerTracker.getSlowNodes(maxSlowPeerReportNodes);
21262136
for (String slowNode : slowNodes) {
21272137
if (StringUtils.isBlank(slowNode)
21282138
|| !slowNode.contains(IP_PORT_SEPARATOR)) {
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.hdfs.server.blockmanagement;
20+
21+
import java.util.List;
22+
import java.util.Map;
23+
import java.util.Set;
24+
import java.util.SortedSet;
25+
26+
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList;
27+
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap;
28+
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
32+
import org.apache.hadoop.classification.InterfaceAudience;
33+
import org.apache.hadoop.conf.Configuration;
34+
import org.apache.hadoop.hdfs.DFSConfigKeys;
35+
import org.apache.hadoop.util.Preconditions;
36+
import org.apache.hadoop.util.Timer;
37+
38+
/**
39+
* Disabled tracker for slow peers. To be used when dfs.datanode.peer.stats.enabled is disabled.
40+
*/
41+
@InterfaceAudience.Private
42+
public class SlowPeerDisabledTracker extends SlowPeerTracker {
43+
44+
private static final Logger LOG = LoggerFactory.getLogger(SlowPeerDisabledTracker.class);
45+
46+
public SlowPeerDisabledTracker(Configuration conf, Timer timer) {
47+
super(conf, timer);
48+
final boolean dataNodePeerStatsEnabledVal =
49+
conf.getBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY,
50+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT);
51+
Preconditions.checkArgument(!dataNodePeerStatsEnabledVal,
52+
"SlowPeerDisabledTracker should only be used for disabled slow peer stats.");
53+
}
54+
55+
@Override
56+
public boolean isSlowPeerTrackerEnabled() {
57+
return false;
58+
}
59+
60+
@Override
61+
public void addReport(String slowNode, String reportingNode) {
62+
LOG.trace("Adding slow peer report is disabled. To enable it, please enable config {}.",
63+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY);
64+
}
65+
66+
@Override
67+
public Set<String> getReportsForNode(String slowNode) {
68+
LOG.trace("Retrieval of slow peer report is disabled. To enable it, please enable config {}.",
69+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY);
70+
return ImmutableSet.of();
71+
}
72+
73+
@Override
74+
public Map<String, SortedSet<String>> getReportsForAllDataNodes() {
75+
LOG.trace("Retrieval of slow peer report for all nodes is disabled. "
76+
+ "To enable it, please enable config {}.",
77+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY);
78+
return ImmutableMap.of();
79+
}
80+
81+
@Override
82+
public String getJson() {
83+
LOG.trace("Retrieval of slow peer reports as json string is disabled. "
84+
+ "To enable it, please enable config {}.",
85+
DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY);
86+
return null;
87+
}
88+
89+
@Override
90+
public List<String> getSlowNodes(int numNodes) {
91+
return ImmutableList.of();
92+
}
93+
94+
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import java.util.Collections;
4040
import java.util.Comparator;
4141
import java.util.HashMap;
42+
import java.util.List;
4243
import java.util.Map;
4344
import java.util.PriorityQueue;
4445
import java.util.Set;
@@ -109,6 +110,15 @@ public SlowPeerTracker(Configuration conf, Timer timer) {
109110
DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT);
110111
}
111112

113+
/**
114+
* If SlowPeerTracker is enabled, return true, else returns false.
115+
*
116+
* @return true if slow peer tracking is enabled, else false.
117+
*/
118+
public boolean isSlowPeerTrackerEnabled() {
119+
return true;
120+
}
121+
112122
/**
113123
* Add a new report. DatanodeIds can be the DataNodeIds or addresses
114124
* We don't care as long as the caller is consistent.
@@ -239,7 +249,7 @@ public SortedSet<String> getReportingNodes() {
239249
* @param numNodes
240250
* @return
241251
*/
242-
public ArrayList<String> getSlowNodes(int numNodes) {
252+
public List<String> getSlowNodes(int numNodes) {
243253
Collection<ReportForJson> jsonReports = getJsonReports(numNodes);
244254
ArrayList<String> slowNodes = new ArrayList<>();
245255
for (ReportForJson jsonReport : jsonReports) {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
import org.apache.hadoop.util.GcTimeMonitor;
9898
import org.apache.hadoop.util.GcTimeMonitor.Builder;
9999
import org.apache.hadoop.tracing.Tracer;
100+
import org.apache.hadoop.util.Timer;
101+
100102
import org.slf4j.Logger;
101103
import org.slf4j.LoggerFactory;
102104

@@ -121,6 +123,8 @@
121123
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
122124
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
123125
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_KEY;
126+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT;
127+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY;
124128
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
125129
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT;
126130
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_DEFAULT;
@@ -338,7 +342,8 @@ public enum OperationCategory {
338342
DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY,
339343
DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY,
340344
DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY,
341-
DFS_BLOCK_INVALIDATE_LIMIT_KEY));
345+
DFS_BLOCK_INVALIDATE_LIMIT_KEY,
346+
DFS_DATANODE_PEER_STATS_ENABLED_KEY));
342347

343348
private static final String USAGE = "Usage: hdfs namenode ["
344349
+ StartupOption.BACKUP.getName() + "] | \n\t["
@@ -2204,9 +2209,10 @@ protected String reconfigurePropertyImpl(String property, String newVal)
22042209
return newVal;
22052210
} else if (property.equals(DFS_IMAGE_PARALLEL_LOAD_KEY)) {
22062211
return reconfigureParallelLoad(newVal);
2207-
} else if (property.equals(DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY)
2208-
|| (property.equals(DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY))
2209-
|| (property.equals(DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY))) {
2212+
} else if (property.equals(DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY) || (property.equals(
2213+
DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY)) || (property.equals(
2214+
DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY)) || (property.equals(
2215+
DFS_DATANODE_PEER_STATS_ENABLED_KEY))) {
22102216
return reconfigureSlowNodesParameters(datanodeManager, property, newVal);
22112217
} else if (property.equals(DFS_BLOCK_INVALIDATE_LIMIT_KEY)) {
22122218
return reconfigureBlockInvalidateLimit(datanodeManager, property, newVal);
@@ -2402,27 +2408,48 @@ String reconfigureSlowNodesParameters(final DatanodeManager datanodeManager,
24022408
namesystem.writeLock();
24032409
String result;
24042410
try {
2405-
if (property.equals(DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY)) {
2406-
boolean enable = (newVal == null ? DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT :
2411+
switch (property) {
2412+
case DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_KEY: {
2413+
boolean enable = (newVal == null ?
2414+
DFS_NAMENODE_AVOID_SLOW_DATANODE_FOR_READ_DEFAULT :
24072415
Boolean.parseBoolean(newVal));
24082416
result = Boolean.toString(enable);
24092417
datanodeManager.setAvoidSlowDataNodesForReadEnabled(enable);
2410-
} else if (property.equals(
2411-
DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY)) {
2418+
break;
2419+
}
2420+
case DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_KEY: {
24122421
boolean enable = (newVal == null ?
24132422
DFS_NAMENODE_BLOCKPLACEMENTPOLICY_EXCLUDE_SLOW_NODES_ENABLED_DEFAULT :
24142423
Boolean.parseBoolean(newVal));
24152424
result = Boolean.toString(enable);
24162425
bm.setExcludeSlowNodesEnabled(enable);
2417-
} else if (property.equals(DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY)) {
2426+
break;
2427+
}
2428+
case DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_KEY: {
24182429
int maxSlowpeerCollectNodes = (newVal == null ?
24192430
DFS_NAMENODE_MAX_SLOWPEER_COLLECT_NODES_DEFAULT :
24202431
Integer.parseInt(newVal));
24212432
result = Integer.toString(maxSlowpeerCollectNodes);
24222433
datanodeManager.setMaxSlowpeerCollectNodes(maxSlowpeerCollectNodes);
2423-
} else {
2424-
throw new IllegalArgumentException("Unexpected property " +
2425-
property + " in reconfigureSlowNodesParameters");
2434+
break;
2435+
}
2436+
case DFS_DATANODE_PEER_STATS_ENABLED_KEY: {
2437+
Timer timer = new Timer();
2438+
if (newVal != null && !newVal.equalsIgnoreCase("true") && !newVal.equalsIgnoreCase(
2439+
"false")) {
2440+
throw new IllegalArgumentException(newVal + " is not boolean value");
2441+
}
2442+
final boolean peerStatsEnabled = newVal == null ?
2443+
DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT :
2444+
Boolean.parseBoolean(newVal);
2445+
result = Boolean.toString(peerStatsEnabled);
2446+
datanodeManager.initSlowPeerTracker(getConf(), timer, peerStatsEnabled);
2447+
break;
2448+
}
2449+
default: {
2450+
throw new IllegalArgumentException(
2451+
"Unexpected property " + property + " in reconfigureSlowNodesParameters");
2452+
}
24262453
}
24272454
LOG.info("RECONFIGURE* changed {} to {}", property, newVal);
24282455
return result;

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeReconfigure.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_KEY;
4848
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_ENABLED_DEFAULT;
49+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY;
4950
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
5051
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT;
5152
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY;
@@ -480,6 +481,34 @@ public void testBlockInvalidateLimit() throws ReconfigurationException {
480481
datanodeManager.getBlockInvalidateLimit());
481482
}
482483

484+
@Test
485+
public void testSlowPeerTrackerEnabled() throws Exception {
486+
final NameNode nameNode = cluster.getNameNode();
487+
final DatanodeManager datanodeManager = nameNode.namesystem.getBlockManager()
488+
.getDatanodeManager();
489+
490+
assertFalse("SlowNode tracker is already enabled. It should be disabled by default",
491+
datanodeManager.getSlowPeerTracker().isSlowPeerTrackerEnabled());
492+
493+
try {
494+
nameNode.reconfigurePropertyImpl(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "non-boolean");
495+
fail("should not reach here");
496+
} catch (ReconfigurationException e) {
497+
assertEquals(
498+
"Could not change property dfs.datanode.peer.stats.enabled from 'false' to 'non-boolean'",
499+
e.getMessage());
500+
}
501+
502+
nameNode.reconfigurePropertyImpl(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "True");
503+
assertTrue("SlowNode tracker is still disabled. Reconfiguration could not be successful",
504+
datanodeManager.getSlowPeerTracker().isSlowPeerTrackerEnabled());
505+
506+
nameNode.reconfigurePropertyImpl(DFS_DATANODE_PEER_STATS_ENABLED_KEY, null);
507+
assertFalse("SlowNode tracker is still enabled. Reconfiguration could not be successful",
508+
datanodeManager.getSlowPeerTracker().isSlowPeerTrackerEnabled());
509+
510+
}
511+
483512
@After
484513
public void shutDown() throws IOException {
485514
if (cluster != null) {

0 commit comments

Comments
 (0)