Skip to content

Commit 46e178d

Browse files
xinglinXing Lin
authored andcommitted
HDFS-17055 Export HAState as a metric from Namenode for monitoring (apache#5764)
1 parent 29362cd commit 46e178d

File tree

7 files changed

+88
-4
lines changed

7 files changed

+88
-4
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
4949
import org.apache.hadoop.ipc.StandbyException;
5050
import org.apache.hadoop.ipc.RPC;
51+
import org.apache.hadoop.metrics2.annotation.Metrics;
5152
import org.apache.hadoop.net.NetUtils;
5253
import org.apache.hadoop.security.UserGroupInformation;
5354

@@ -68,6 +69,7 @@
6869
* </ol>
6970
*/
7071
@InterfaceAudience.Private
72+
@Metrics(context="dfs")
7173
public class BackupNode extends NameNode {
7274
private static final String BN_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
7375
private static final String BN_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878
import org.apache.hadoop.ipc.RetriableException;
7979
import org.apache.hadoop.ipc.Server;
8080
import org.apache.hadoop.ipc.StandbyException;
81+
import org.apache.hadoop.metrics2.annotation.Metric;
82+
import org.apache.hadoop.metrics2.annotation.Metrics;
8183
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
8284
import org.apache.hadoop.metrics2.util.MBeans;
8385
import org.apache.hadoop.net.NetUtils;
@@ -246,6 +248,7 @@
246248
* NameNode state, for example partial blocksMap etc.
247249
**********************************************************/
248250
@InterfaceAudience.Private
251+
@Metrics(context="dfs")
249252
public class NameNode extends ReconfigurableBase implements
250253
NameNodeStatusMXBean, TokenVerifier<DelegationTokenIdentifier> {
251254
static{
@@ -1049,6 +1052,7 @@ protected NameNode(Configuration conf, NamenodeRole role)
10491052
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE,
10501053
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT);
10511054
this.started.set(true);
1055+
DefaultMetricsSystem.instance().register(this);
10521056
}
10531057

10541058
private void stopAtException(Exception e){
@@ -1119,6 +1123,7 @@ public void stop() {
11191123
levelDBAliasMapServer.close();
11201124
}
11211125
}
1126+
started.set(false);
11221127
tracer.close();
11231128
}
11241129

@@ -1951,6 +1956,26 @@ synchronized HAServiceState getServiceState() {
19511956
return state.getServiceState();
19521957
}
19531958

1959+
/**
1960+
* Emit Namenode HA service state as an integer so that one can monitor NN HA
1961+
* state based on this metric.
1962+
*
1963+
* @return 0 when not fully started
1964+
* 1 for active or standalone (non-HA) NN
1965+
* 2 for standby
1966+
* 3 for observer
1967+
*
1968+
* These are the same integer values for the HAServiceState enum.
1969+
*/
1970+
@Metric({"NameNodeState", "Namenode HA service state"})
1971+
public int getNameNodeState() {
1972+
if (!isStarted() || state == null) {
1973+
return HAServiceState.INITIALIZING.ordinal();
1974+
}
1975+
1976+
return state.getServiceState().ordinal();
1977+
}
1978+
19541979
/**
19551980
* Register NameNodeStatusMXBean
19561981
*/

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSFinalize.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ public void testFinalize() throws Exception {
154154
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
155155

156156
log("Finalize NN & BP with existing previous dir", numDirs);
157-
String bpid = UpgradeUtilities.getCurrentBlockPoolID(cluster);
157+
String bpid = UpgradeUtilities.getCurrentBlockPoolID(null);
158158
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
159159
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
160160
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ public void testRollback() throws Exception {
327327
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
328328

329329
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
330-
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(cluster));
330+
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(null));
331331
startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
332332
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
333333
} // end numDir loop

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ public void testUpgrade() throws Exception {
348348
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
349349

350350
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
351-
UpgradeUtilities.getCurrentBlockPoolID(cluster));
351+
UpgradeUtilities.getCurrentBlockPoolID(null));
352352

353353
startNameNodeShouldFail(StartupOption.UPGRADE);
354354
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
@@ -361,7 +361,7 @@ public void testUpgrade() throws Exception {
361361
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
362362

363363
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
364-
UpgradeUtilities.getCurrentBlockPoolID(cluster));
364+
UpgradeUtilities.getCurrentBlockPoolID(null));
365365

366366
startNameNodeShouldFail(StartupOption.UPGRADE);
367367
UpgradeUtilities.createEmptyDirs(nameNodeDirs);

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMetricsLogger.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.function.Supplier;
2222
import org.apache.commons.logging.Log;
2323
import org.apache.commons.logging.impl.Log4JLogger;
24+
import org.apache.hadoop.metrics2.annotation.Metrics;
2425
import org.slf4j.Logger;
2526
import org.slf4j.LoggerFactory;
2627
import org.apache.hadoop.conf.Configuration;
@@ -128,6 +129,7 @@ private void addAppender(Log log, Appender appender) {
128129
/**
129130
* A NameNode that stubs out the NameSystem for testing.
130131
*/
132+
@Metrics(context="dfs")
131133
private static class TestNameNode extends NameNode {
132134
@Override
133135
protected void loadNamesystem(Configuration conf) throws IOException {

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hdfs.server.namenode.ha;
1919

20+
import java.io.IOException;
21+
import org.apache.hadoop.ha.HAServiceProtocol;
2022
import org.slf4j.Logger;
2123
import org.slf4j.LoggerFactory;
2224
import org.apache.hadoop.conf.Configuration;
@@ -29,6 +31,7 @@
2931
import org.apache.hadoop.hdfs.MiniDFSCluster;
3032
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
3133
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
34+
import org.apache.hadoop.hdfs.server.namenode.NameNode;
3235
import org.apache.hadoop.io.IOUtils;
3336
import org.junit.Test;
3437

@@ -176,4 +179,56 @@ public void testHAInodeCount() throws Exception {
176179
}
177180

178181
}
182+
183+
/**
184+
* Test the getNameNodeState() API added to NameNode.java.
185+
*
186+
* @throws IOException
187+
*/
188+
@Test
189+
public void testGetNameNodeState() throws IOException {
190+
Configuration conf = new Configuration();
191+
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
192+
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
193+
194+
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(
195+
MiniDFSNNTopology.simpleHATopology(3)).numDataNodes(1).build();
196+
197+
cluster.waitActive();
198+
199+
NameNode nn0 = cluster.getNameNode(0);
200+
NameNode nn1 = cluster.getNameNode(1);
201+
NameNode nn2 = cluster.getNameNode(2);
202+
203+
// All namenodes are in standby by default
204+
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
205+
nn0.getNameNodeState());
206+
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
207+
nn1.getNameNodeState());
208+
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
209+
nn2.getNameNodeState());
210+
211+
// Transition nn0 to be active
212+
cluster.transitionToActive(0);
213+
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
214+
nn0.getNameNodeState());
215+
216+
// Transition nn1 to be active
217+
cluster.transitionToStandby(0);
218+
cluster.transitionToActive(1);
219+
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
220+
nn0.getNameNodeState());
221+
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
222+
nn1.getNameNodeState());
223+
224+
// Transition nn2 to observer
225+
cluster.transitionToObserver(2);
226+
assertEquals(HAServiceProtocol.HAServiceState.OBSERVER.ordinal(),
227+
nn2.getNameNodeState());
228+
229+
// Shutdown nn2. Now getNameNodeState should return the INITIALIZING state.
230+
cluster.shutdownNameNode(2);
231+
assertEquals(HAServiceProtocol.HAServiceState.INITIALIZING.ordinal(),
232+
nn2.getNameNodeState());
233+
}
179234
}

0 commit comments

Comments
 (0)