Skip to content

Commit 349acbd

Browse files
committed
Backported feature HDFS-16547 to dissallow observer transition if namenode is in safemode
1 parent e8f87f7 commit 349acbd

File tree

7 files changed

+67
-9
lines changed

7 files changed

+67
-9
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,6 +1903,9 @@ synchronized void transitionToStandby()
19031903
synchronized void transitionToObserver()
19041904
throws ServiceFailedException, AccessControlException {
19051905
namesystem.checkSuperuserPrivilege();
1906+
if (notBecomeActiveInSafemode && isInSafeMode()) {
1907+
throw new ServiceFailedException(getRole() + " still not leave safemode");
1908+
}
19061909
if (!haEnabled) {
19071910
throw new ServiceFailedException("HA for namenode is not enabled");
19081911
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ private boolean checkSupportObserver(HAServiceTarget target) {
247247
}
248248

249249
private int transitionToObserver(final CommandLine cmd)
250-
throws IOException, ServiceFailedException {
250+
throws IOException {
251251
String[] argv = cmd.getArgs();
252252
if (argv.length != 1) {
253253
errOut.println("transitionToObserver: incorrect number of arguments");
@@ -262,8 +262,13 @@ private int transitionToObserver(final CommandLine cmd)
262262
if (!checkManualStateManagementOK(target)) {
263263
return -1;
264264
}
265-
HAServiceProtocol proto = target.getProxy(getConf(), 0);
266-
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
265+
try {
266+
HAServiceProtocol proto = target.getProxy(getConf(), 0);
267+
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
268+
} catch (ServiceFailedException e) {
269+
errOut.println("transitionToObserver failed! " + e.getLocalizedMessage());
270+
return -1;
271+
}
267272
return 0;
268273
}
269274

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3692,7 +3692,7 @@
36923692
<name>dfs.ha.nn.not-become-active-in-safemode</name>
36933693
<value>false</value>
36943694
<description>
3695-
This will prevent safe mode namenodes to become active while other standby
3695+
This will prevent safe mode namenodes to become active or observer while other standby
36963696
namenodes might be ready to serve requests when it is set to true.
36973697
</description>
36983698
</property>

hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,12 +296,14 @@ The order in which you set these configurations is unimportant, but the values y
296296
<value>hdfs://mycluster</value>
297297
</property>
298298

299-
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
299+
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
300300

301301
Whether allow namenode to become active when it is in safemode, when it is
302302
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
303303
auto failover is on, or will throw exception to fail the transition to
304-
active if auto failover is off. For example:
304+
active if auto failover is off. If you transition namenode to observer state
305+
when it is in safemode, when this configuration is set to true, namenode will throw exception
306+
to fail the transition to observer. For example:
305307

306308
<property>
307309
<name>dfs.ha.nn.not-become-active-in-safemode</name>

hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,14 @@ The order in which you set these configurations is unimportant, but the values y
347347
<value>/path/to/journal/node/local/data</value>
348348
</property>
349349

350-
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
350+
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
351351

352352
Whether allow namenode to become active when it is in safemode, when it is
353353
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
354354
auto failover is on, or will throw exception to fail the transition to
355-
active if auto failover is off. For example:
355+
active if auto failover is off. If you transition namenode to observer state
356+
when it is in safemode, when this configuration is set to true, namenode will throw exception
357+
to fail the transition to observer. For example:
356358

357359
<property>
358360
<name>dfs.ha.nn.not-become-active-in-safemode</name>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,4 +939,26 @@ public void testTransitionToActiveWhenSafeMode() throws Exception {
939939
() -> miniCluster.transitionToActive(0));
940940
}
941941
}
942+
943+
@Test
944+
public void testTransitionToObserverWhenSafeMode() throws Exception {
945+
Configuration config = new Configuration();
946+
config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
947+
try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
948+
new File(GenericTestUtils.getRandomizedTempPath()))
949+
.nnTopology(MiniDFSNNTopology.simpleHATopology())
950+
.numDataNodes(1)
951+
.build()) {
952+
miniCluster.waitActive();
953+
miniCluster.transitionToStandby(0);
954+
miniCluster.transitionToStandby(1);
955+
NameNode namenode0 = miniCluster.getNameNode(0);
956+
NameNode namenode1 = miniCluster.getNameNode(1);
957+
NameNodeAdapter.enterSafeMode(namenode0, false);
958+
NameNodeAdapter.enterSafeMode(namenode1, false);
959+
LambdaTestUtils.intercept(ServiceFailedException.class,
960+
"NameNode still not leave safemode",
961+
() -> miniCluster.transitionToObserver(0));
962+
}
963+
}
942964
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
*/
1818
package org.apache.hadoop.hdfs.tools;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
2021
import static org.junit.Assert.assertEquals;
2122
import static org.junit.Assert.assertFalse;
2223
import static org.junit.Assert.assertTrue;
2324

25+
import java.io.ByteArrayInputStream;
2426
import java.io.ByteArrayOutputStream;
2527
import java.io.File;
2628
import java.io.IOException;
@@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster {
7072
@Before
7173
public void setup() throws IOException {
7274
conf = new Configuration();
75+
conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
7376
cluster = new MiniDFSCluster.Builder(conf)
7477
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
7578
.build();
@@ -161,7 +164,28 @@ public void testObserverIllegalTransition() throws Exception {
161164
assertEquals(-1, runTool("-transitionToActive", "nn1"));
162165
assertFalse(nnode1.isActiveState());
163166
}
164-
167+
168+
/**
169+
* Tests that a Namenode in safe mode should not be transfer to observer.
170+
*/
171+
@Test
172+
public void testObserverTransitionInSafeMode() throws Exception {
173+
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
174+
DFSHAAdmin admin = new DFSHAAdmin();
175+
admin.setConf(conf);
176+
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
177+
int result = admin.run(
178+
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
179+
assertEquals("State transition returned: " + result, -1, result);
180+
181+
NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0));
182+
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
183+
int result1 = admin.run(
184+
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
185+
assertEquals("State transition returned: " + result1, 0, result1);
186+
assertFalse(cluster.getNameNode(0).isInSafeMode());
187+
}
188+
165189
@Test
166190
public void testTryFailoverToSafeMode() throws Exception {
167191
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,

0 commit comments

Comments
 (0)