Skip to content

Commit 69d17ec

Browse files
committed
HDFS-16547. [SBN read] Namenode in safe mode should not be transfer to observer state
1 parent c71a68c commit 69d17ec

File tree

7 files changed

+67
-9
lines changed

7 files changed

+67
-9
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,9 @@ synchronized void transitionToStandby() throws IOException {
20092009
synchronized void transitionToObserver() throws IOException {
20102010
String operationName = "transitionToObserver";
20112011
namesystem.checkSuperuserPrivilege(operationName);
2012+
if (notBecomeActiveInSafemode && isInSafeMode()) {
2013+
throw new ServiceFailedException(getRole() + " still not leave safemode");
2014+
}
20122015
if (!haEnabled) {
20132016
throw new ServiceFailedException("HA for namenode is not enabled");
20142017
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ private boolean checkSupportObserver(HAServiceTarget target) {
247247
}
248248

249249
private int transitionToObserver(final CommandLine cmd)
250-
throws IOException, ServiceFailedException {
250+
throws IOException {
251251
String[] argv = cmd.getArgs();
252252
if (argv.length != 1) {
253253
errOut.println("transitionToObserver: incorrect number of arguments");
@@ -262,8 +262,13 @@ private int transitionToObserver(final CommandLine cmd)
262262
if (!checkManualStateManagementOK(target)) {
263263
return -1;
264264
}
265-
HAServiceProtocol proto = target.getProxy(getConf(), 0);
266-
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
265+
try {
266+
HAServiceProtocol proto = target.getProxy(getConf(), 0);
267+
HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo());
268+
} catch (ServiceFailedException e) {
269+
errOut.println("transitionToObserver failed! " + e.getLocalizedMessage());
270+
return -1;
271+
}
267272
return 0;
268273
}
269274

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3725,7 +3725,7 @@
37253725
<name>dfs.ha.nn.not-become-active-in-safemode</name>
37263726
<value>false</value>
37273727
<description>
3728-
This will prevent safe mode namenodes to become active while other standby
3728+
This will prevent safe mode namenodes to become active or observer while other standby
37293729
namenodes might be ready to serve requests when it is set to true.
37303730
</description>
37313731
</property>

hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y
316316
<value>hdfs://mycluster</value>
317317
</property>
318318

319-
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
319+
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
320320

321321
Whether allow namenode to become active when it is in safemode, when it is
322322
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
323323
auto failover is on, or will throw exception to fail the transition to
324-
active if auto failover is off. For example:
324+
active if auto failover is off. If you transition namenode to observer state
325+
when it is in safemode, when this configuration is set to true, namenode will throw exception
326+
to fail the transition to observer. For example:
325327

326328
<property>
327329
<name>dfs.ha.nn.not-become-active-in-safemode</name>

hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y
376376
<value>/path/to/journal/node/local/data</value>
377377
</property>
378378

379-
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active
379+
* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer
380380

381381
Whether allow namenode to become active when it is in safemode, when it is
382382
set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if
383383
auto failover is on, or will throw exception to fail the transition to
384-
active if auto failover is off. For example:
384+
active if auto failover is off. If you transition namenode to observer state
385+
when it is in safemode, when this configuration is set to true, namenode will throw exception
386+
to fail the transition to observer. For example:
385387

386388
<property>
387389
<name>dfs.ha.nn.not-become-active-in-safemode</name>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,4 +977,26 @@ public void testTransitionToActiveWhenSafeMode() throws Exception {
977977
() -> miniCluster.transitionToActive(0));
978978
}
979979
}
980+
981+
@Test
982+
public void testTransitionToObserverWhenSafeMode() throws Exception {
983+
Configuration config = new Configuration();
984+
config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
985+
try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config,
986+
new File(GenericTestUtils.getRandomizedTempPath()))
987+
.nnTopology(MiniDFSNNTopology.simpleHATopology())
988+
.numDataNodes(1)
989+
.build()) {
990+
miniCluster.waitActive();
991+
miniCluster.transitionToStandby(0);
992+
miniCluster.transitionToStandby(1);
993+
NameNode namenode0 = miniCluster.getNameNode(0);
994+
NameNode namenode1 = miniCluster.getNameNode(1);
995+
NameNodeAdapter.enterSafeMode(namenode0, false);
996+
NameNodeAdapter.enterSafeMode(namenode1, false);
997+
LambdaTestUtils.intercept(ServiceFailedException.class,
998+
"NameNode still not leave safemode",
999+
() -> miniCluster.transitionToObserver(0));
1000+
}
1001+
}
9801002
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
*/
1818
package org.apache.hadoop.hdfs.tools;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE;
2021
import static org.junit.Assert.assertEquals;
2122
import static org.junit.Assert.assertFalse;
2223
import static org.junit.Assert.assertTrue;
2324

25+
import java.io.ByteArrayInputStream;
2426
import java.io.ByteArrayOutputStream;
2527
import java.io.File;
2628
import java.io.IOException;
@@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster {
7072
@Before
7173
public void setup() throws IOException {
7274
conf = new Configuration();
75+
conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true);
7376
cluster = new MiniDFSCluster.Builder(conf)
7477
.nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
7578
.build();
@@ -161,7 +164,28 @@ public void testObserverIllegalTransition() throws Exception {
161164
assertEquals(-1, runTool("-transitionToActive", "nn1"));
162165
assertFalse(nnode1.isActiveState());
163166
}
164-
167+
168+
/**
169+
* Tests that a Namenode in safe mode should not be transfer to observer.
170+
*/
171+
@Test
172+
public void testObserverTransitionInSafeMode() throws Exception {
173+
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
174+
DFSHAAdmin admin = new DFSHAAdmin();
175+
admin.setConf(conf);
176+
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
177+
int result = admin.run(
178+
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
179+
assertEquals("State transition returned: " + result, -1, result);
180+
181+
NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0));
182+
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
183+
int result1 = admin.run(
184+
new String[]{"-transitionToObserver", "-forcemanual", "nn1"});
185+
assertEquals("State transition returned: " + result1, 0, result1);
186+
assertFalse(cluster.getNameNode(0).isInSafeMode());
187+
}
188+
165189
@Test
166190
public void testTryFailoverToSafeMode() throws Exception {
167191
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,

0 commit comments

Comments
 (0)