Skip to content

Commit 72216c2

Browse files
顾鹏顾鹏
authored andcommitted
HDFS-17223. Add journalnode maintenance node list
1 parent 000a39b commit 72216c2

File tree

9 files changed

+169
-11
lines changed

9 files changed

+169
-11
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
14661466
"dfs.journalnode.edit-cache-size.fraction";
14671467
public static final float DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT = 0.5f;
14681468

1469+
public static final String DFS_JOURNALNODE_MAINTENANCE_NODES_KEY =
1470+
"dfs.journalnode.maintenance.nodes";
1471+
public static final String[] DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT = {};
1472+
14691473
// Journal-node related configs for the client side.
14701474
public static final String DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY = "dfs.qjournal.queued-edits.limit.mb";
14711475
public static final int DFS_QJOURNAL_QUEUE_SIZE_LIMIT_DEFAULT = 10;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import org.apache.hadoop.classification.InterfaceStability;
7171
import org.apache.hadoop.fs.ParentNotDirectoryException;
7272
import org.apache.hadoop.fs.UnresolvedLinkException;
73+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
7374
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7475
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
7576
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
@@ -1982,4 +1983,32 @@ public static void addTransferRateMetric(final DataNodeMetrics metrics, final lo
19821983
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
19831984
}
19841985
}
1986+
1987+
/**
1988+
* Construct a HostSet from an array of "ip:port" strings.
1989+
* @param nodesHostPort ip port string array.
1990+
* @return HostSet of InetSocketAddress.
1991+
*/
1992+
public static HostSet getHostSet(String[] nodesHostPort) {
1993+
HostSet retSet = new HostSet();
1994+
for (String hostPort : nodesHostPort) {
1995+
try {
1996+
URI uri = new URI("dummy", hostPort, null, null, null);
1997+
if (uri.getPort() == -1 || uri.getPort() == 0) {
1998+
LOG.warn(String.format("The ip:port `%s` is invalid, skip this node.", hostPort));
1999+
continue;
2000+
}
2001+
int port = uri.getPort();
2002+
InetSocketAddress inetSocketAddress = new InetSocketAddress(uri.getHost(), port);
2003+
if (inetSocketAddress.isUnresolved()) {
2004+
LOG.warn(String.format("Failed to resolve address `%s`", hostPort));
2005+
continue;
2006+
}
2007+
retSet.add(inetSocketAddress);
2008+
} catch (URISyntaxException e) {
2009+
LOG.warn(String.format("Failed to parse `%s`", hostPort));
2010+
}
2011+
}
2012+
return retSet;
2013+
}
19852014
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,15 @@ class AsyncLoggerSet {
5353

5454
private static final long INVALID_EPOCH = -1;
5555
private long myEpoch = INVALID_EPOCH;
56+
private final int majoritySize;
5657

57-
public AsyncLoggerSet(List<AsyncLogger> loggers) {
58+
AsyncLoggerSet(List<AsyncLogger> loggers) {
59+
this(loggers, loggers.size());
60+
}
61+
62+
AsyncLoggerSet(List<AsyncLogger> loggers, int quorumJournalCount) {
5863
this.loggers = ImmutableList.copyOf(loggers);
64+
this.majoritySize = quorumJournalCount / 2 + 1;
5965
}
6066

6167
void setEpoch(long e) {
@@ -151,7 +157,7 @@ <V> Map<AsyncLogger, V> waitForWriteQuorum(QuorumCall<AsyncLogger, V> q,
151157
* @return the number of nodes which are required to obtain a quorum.
152158
*/
153159
int getMajoritySize() {
154-
return loggers.size() / 2 + 1;
160+
return this.majoritySize;
155161
}
156162

157163
/**

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package org.apache.hadoop.hdfs.qjournal.client;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT;
21+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_KEY;
22+
2023
import java.io.IOException;
2124
import java.net.InetSocketAddress;
2225
import java.net.URI;
@@ -31,6 +34,7 @@
3134
import java.util.concurrent.TimeUnit;
3235
import java.util.concurrent.TimeoutException;
3336

37+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
3438
import org.apache.hadoop.util.Lists;
3539
import org.slf4j.Logger;
3640
import org.slf4j.LoggerFactory;
@@ -108,6 +112,7 @@ public class QuorumJournalManager implements JournalManager {
108112
private static final int OUTPUT_BUFFER_CAPACITY_DEFAULT = 512 * 1024;
109113
private int outputBufferCapacity;
110114
private final URLConnectionFactory connectionFactory;
115+
private int quorumJournalCount;
111116

112117
/** Limit logging about input stream selection to every 5 seconds max. */
113118
private static final long SELECT_INPUT_STREAM_LOG_INTERVAL_MS = 5000;
@@ -144,7 +149,14 @@ public QuorumJournalManager(Configuration conf,
144149
this.uri = uri;
145150
this.nsInfo = nsInfo;
146151
this.nameServiceId = nameServiceId;
147-
this.loggers = new AsyncLoggerSet(createLoggers(loggerFactory));
152+
this.loggers = new AsyncLoggerSet(createLoggers(loggerFactory), this.quorumJournalCount);
153+
154+
// Check whether the number of jn maintenance lists is valid
155+
int quorumThreshold = quorumJournalCount / 2 + 1;
156+
Preconditions.checkArgument(
157+
this.loggers.size() >= quorumThreshold,
158+
"The total journalnode minus %s the number of blacklists must be greater than or equal to"
159+
+ " %s!", DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, quorumThreshold);
148160

149161
this.maxTxnsPerRpc =
150162
conf.getInt(QJM_RPC_MAX_TXNS_KEY, QJM_RPC_MAX_TXNS_DEFAULT);
@@ -250,6 +262,9 @@ Map<AsyncLogger, NewEpochResponseProto> createNewUniqueEpoch()
250262

251263
@Override
252264
public void format(NamespaceInfo nsInfo, boolean force) throws IOException {
265+
if (isEnableJnMaintenance()) {
266+
throw new IOException("format() does not support enabling jn maintenance mode");
267+
}
253268
QuorumCall<AsyncLogger, Void> call = loggers.format(nsInfo, force);
254269
try {
255270
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -406,21 +421,39 @@ private void recoverUnclosedSegment(long segmentTxId) throws IOException {
406421
logToSync.getStartTxId(),
407422
logToSync.getEndTxId()));
408423
}
409-
410-
static List<AsyncLogger> createLoggers(Configuration conf,
424+
425+
List<AsyncLogger> createLoggers(Configuration conf,
426+
URI uri,
427+
NamespaceInfo nsInfo,
428+
AsyncLogger.Factory factory,
429+
String nameServiceId)
430+
throws IOException {
431+
String[] skipNodesHostPort = conf.getTrimmedStrings(
432+
DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT);
433+
return createLoggers(conf, uri, nsInfo, factory, nameServiceId, skipNodesHostPort);
434+
}
435+
436+
private List<AsyncLogger> createLoggers(Configuration conf,
411437
URI uri,
412438
NamespaceInfo nsInfo,
413439
AsyncLogger.Factory factory,
414-
String nameServiceId)
440+
String nameServiceId,
441+
String[] skipNodesHostPort)
415442
throws IOException {
416443
List<AsyncLogger> ret = Lists.newArrayList();
417444
List<InetSocketAddress> addrs = Util.getAddressesList(uri, conf);
418445
if (addrs.size() % 2 == 0) {
419446
LOG.warn("Quorum journal URI '" + uri + "' has an even number " +
420447
"of Journal Nodes specified. This is not recommended!");
421448
}
449+
setQuorumJournalCount(addrs.size());
450+
HostSet skipSet = DFSUtil.getHostSet(skipNodesHostPort);
422451
String jid = parseJournalId(uri);
423452
for (InetSocketAddress addr : addrs) {
453+
if(skipSet.match(addr)) {
454+
LOG.info("The node {} is a maintenance node and will skip initialization.", addr);
455+
continue;
456+
}
424457
ret.add(factory.createLogger(conf, nsInfo, jid, nameServiceId, addr));
425458
}
426459
return ret;
@@ -667,6 +700,9 @@ AsyncLoggerSet getLoggerSetForTests() {
667700

668701
@Override
669702
public void doPreUpgrade() throws IOException {
703+
if (isEnableJnMaintenance()) {
704+
throw new IOException("doPreUpgrade() does not support enabling jn maintenance mode");
705+
}
670706
QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade();
671707
try {
672708
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -684,6 +720,9 @@ public void doPreUpgrade() throws IOException {
684720

685721
@Override
686722
public void doUpgrade(Storage storage) throws IOException {
723+
if (isEnableJnMaintenance()) {
724+
throw new IOException("doUpgrade() does not support enabling jn maintenance mode");
725+
}
687726
QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage);
688727
try {
689728
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -701,6 +740,9 @@ public void doUpgrade(Storage storage) throws IOException {
701740

702741
@Override
703742
public void doFinalize() throws IOException {
743+
if (isEnableJnMaintenance()) {
744+
throw new IOException("doFinalize() does not support enabling jn maintenance mode");
745+
}
704746
QuorumCall<AsyncLogger, Void> call = loggers.doFinalize();
705747
try {
706748
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -719,6 +761,9 @@ public void doFinalize() throws IOException {
719761
@Override
720762
public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
721763
int targetLayoutVersion) throws IOException {
764+
if (isEnableJnMaintenance()) {
765+
throw new IOException("canRollBack() does not support enabling jn maintenance mode");
766+
}
722767
QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage,
723768
prevStorage, targetLayoutVersion);
724769
try {
@@ -753,6 +798,9 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
753798

754799
@Override
755800
public void doRollback() throws IOException {
801+
if (isEnableJnMaintenance()) {
802+
throw new IOException("doRollback() does not support enabling jn maintenance mode");
803+
}
756804
QuorumCall<AsyncLogger, Void> call = loggers.doRollback();
757805
try {
758806
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -770,6 +818,9 @@ public void doRollback() throws IOException {
770818

771819
@Override
772820
public void discardSegments(long startTxId) throws IOException {
821+
if (isEnableJnMaintenance()) {
822+
throw new IOException("discardSegments() does not support enabling jn maintenance mode");
823+
}
773824
QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId);
774825
try {
775826
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -789,6 +840,9 @@ public void discardSegments(long startTxId) throws IOException {
789840

790841
@Override
791842
public long getJournalCTime() throws IOException {
843+
if (isEnableJnMaintenance()) {
844+
throw new IOException("getJournalCTime() does not support enabling jn maintenance mode");
845+
}
792846
QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime();
793847
try {
794848
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -819,4 +873,12 @@ public long getJournalCTime() throws IOException {
819873

820874
throw new AssertionError("Unreachable code.");
821875
}
876+
877+
public void setQuorumJournalCount(int quorumJournalCount) {
878+
this.quorumJournalCount = quorumJournalCount;
879+
}
880+
881+
private boolean isEnableJnMaintenance() {
882+
return this.loggers.size() < quorumJournalCount;
883+
}
822884
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class HostSet implements Iterable<InetSocketAddress> {
4545
* The function that checks whether there exists an entry foo in the set
4646
* so that foo &lt;= addr.
4747
*/
48-
boolean matchedBy(InetSocketAddress addr) {
48+
public boolean matchedBy(InetSocketAddress addr) {
4949
Collection<Integer> ports = addrs.get(addr.getAddress());
5050
return addr.getPort() == 0 ? !ports.isEmpty() : ports.contains(addr
5151
.getPort());
@@ -55,23 +55,23 @@ boolean matchedBy(InetSocketAddress addr) {
5555
* The function that checks whether there exists an entry foo in the set
5656
* so that addr &lt;= foo.
5757
*/
58-
boolean match(InetSocketAddress addr) {
58+
public boolean match(InetSocketAddress addr) {
5959
int port = addr.getPort();
6060
Collection<Integer> ports = addrs.get(addr.getAddress());
6161
boolean exactMatch = ports.contains(port);
6262
boolean genericMatch = ports.contains(0);
6363
return exactMatch || genericMatch;
6464
}
6565

66-
boolean isEmpty() {
66+
public boolean isEmpty() {
6767
return addrs.isEmpty();
6868
}
6969

70-
int size() {
70+
public int size() {
7171
return addrs.size();
7272
}
7373

74-
void add(InetSocketAddress addr) {
74+
public void add(InetSocketAddress addr) {
7575
Preconditions.checkArgument(!addr.isUnresolved());
7676
addrs.put(addr.getAddress(), addr.getPort());
7777
}

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6333,6 +6333,20 @@
63336333
</description>
63346334
</property>
63356335

6336+
<property>
6337+
<name>dfs.journalnode.maintenance.nodes</name>
6338+
<value></value>
6339+
<description>
6340+
In the case of one out of three journal nodes being down, theoretically the service can still
6341+
continue. However, in reality, the downed node may not recover quickly. If the Namenode needs
6342+
to be restarted, it will try the downed journal node through the lengthy RPC retry mechanism,
6343+
resulting in a long initialization time for the Namenode to provide services. By adding the
6344+
downed journal node to the maintenance nodes, the initialization time of the Namenode in such
6345+
scenarios can be accelerated.
6346+
</description>
6347+
</property>
6348+
6349+
63366350
<property>
63376351
<name>dfs.namenode.lease-hard-limit-sec</name>
63386352
<value>1200</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import static org.junit.Assert.assertArrayEquals;
4242
import static org.junit.Assert.assertEquals;
4343
import static org.junit.Assert.assertFalse;
44+
import static org.junit.Assert.assertNotNull;
4445
import static org.junit.Assert.assertNull;
4546
import static org.junit.Assert.assertThat;
4647
import static org.junit.Assert.assertTrue;
@@ -72,6 +73,7 @@
7273
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
7374
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
7475
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
76+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
7577
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7678
import org.apache.hadoop.hdfs.server.namenode.NameNode;
7779
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
@@ -1137,4 +1139,24 @@ public void testAddTransferRateMetricForInvalidValue() {
11371139
DFSUtil.addTransferRateMetric(mockMetrics, 100, 0);
11381140
verify(mockMetrics, times(0)).addReadTransferRate(anyLong());
11391141
}
1142+
1143+
@Test
1144+
public void testGetHostSet() {
1145+
String[] testAddrs = new String[] {NS1_NN_ADDR, NS1_NN1_ADDR};
1146+
HostSet hostSet = DFSUtil.getHostSet(testAddrs);
1147+
assertNotNull(hostSet);
1148+
assertEquals(0, hostSet.size());
1149+
1150+
String strAddress = "localhost";
1151+
testAddrs = new String[] {strAddress};
1152+
hostSet = DFSUtil.getHostSet(testAddrs);
1153+
assertEquals(0, hostSet.size());
1154+
1155+
strAddress = "localhost:9000";
1156+
InetSocketAddress inetSocketAddress = new InetSocketAddress("localhost", 9000);
1157+
testAddrs = new String[] {strAddress};
1158+
hostSet = DFSUtil.getHostSet(testAddrs);
1159+
assertNotNull(hostSet);
1160+
assertTrue(hostSet.match(inetSocketAddress));
1161+
}
11401162
}

0 commit comments

Comments
 (0)