Skip to content

Commit bb12e81

Browse files
author
Hanisha Koneru
authored
HDDS-1175. Serve read requests directly from RocksDB. (#557)
HDDS-1175. Serve read requests directly from RocksDB.
1 parent a55fc36 commit bb12e81

File tree

12 files changed

+374
-45
lines changed

12 files changed

+374
-45
lines changed

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,6 +1646,16 @@
16461646
</description>
16471647
</property>
16481648

1649+
<property>
1650+
<name>ozone.om.ratis.server.role.check.interval</name>
1651+
<value>15s</value>
1652+
<tag>OZONE, OM, RATIS, MANAGEMENT</tag>
1653+
<description>The interval between OM leader performing a role
1654+
check on its ratis server. Ratis server informs OM if it
1655+
loses the leader role. The scheduled check is an secondary
1656+
check to ensure that the leader role is updated periodically
1657+
.</description>
1658+
</property>
16491659

16501660
<property>
16511661
<name>ozone.acl.authorizer.class</name>

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ private OMConfigKeys() {
183183
OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
184184
= TimeDuration.valueOf(120, TimeUnit.SECONDS);
185185

186+
// OM Leader server role check interval
187+
public static final String OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_KEY
188+
= "ozone.om.ratis.server.role.check.interval";
189+
public static final TimeDuration
190+
OZONE_OM_RATIS_SERVER_ROLE_CHECK_INTERVAL_DEFAULT
191+
= TimeDuration.valueOf(15, TimeUnit.SECONDS);
192+
186193
public static final String OZONE_OM_KERBEROS_KEYTAB_FILE_KEY = "ozone.om."
187194
+ "kerberos.keytab.file";
188195
public static final String OZONE_OM_KERBEROS_PRINCIPAL_KEY = "ozone.om"
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.ozone.om.exceptions;
20+
21+
import java.io.IOException;
22+
23+
/**
24+
* Exception thrown by
25+
* {@link org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolPB} when
26+
* a read request is received by a non leader OM node.
27+
*/
28+
public class NotLeaderException extends IOException {
29+
30+
private final String currentPeerId;
31+
private final String leaderPeerId;
32+
33+
public NotLeaderException(String currentPeerIdStr) {
34+
super("OM " + currentPeerIdStr + " is not the leader. Could not " +
35+
"determine the leader node.");
36+
this.currentPeerId = currentPeerIdStr;
37+
this.leaderPeerId = null;
38+
}
39+
40+
public NotLeaderException(String currentPeerIdStr,
41+
String suggestedLeaderPeerIdStr) {
42+
super("OM " + currentPeerIdStr + " is not the leader. Suggested leader is "
43+
+ suggestedLeaderPeerIdStr);
44+
this.currentPeerId = currentPeerIdStr;
45+
this.leaderPeerId = suggestedLeaderPeerIdStr;
46+
}
47+
48+
public String getSuggestedLeaderNodeId() {
49+
return leaderPeerId;
50+
}
51+
}

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,14 @@ public Class<OzoneManagerProtocolPB> getInterface() {
226226
* not match the current leaderOMNodeId cached by the proxy provider.
227227
*/
228228
public void performFailoverIfRequired(String newLeaderOMNodeId) {
229-
if (updateLeaderOMNodeId(newLeaderOMNodeId)) {
230-
LOG.debug("Failing over OM proxy to nodeId: {}", newLeaderOMNodeId);
229+
if (newLeaderOMNodeId == null) {
230+
LOG.debug("No suggested leader nodeId. Performing failover to next peer" +
231+
" node");
232+
performFailover(null);
233+
} else {
234+
if (updateLeaderOMNodeId(newLeaderOMNodeId)) {
235+
LOG.debug("Failing over OM proxy to nodeId: {}", newLeaderOMNodeId);
236+
}
231237
}
232238
}
233239

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* the License.
1616
*/
1717

18-
package org.apache.hadoop.ozone.om.ratis;
18+
package org.apache.hadoop.ozone.om.helpers;
1919

2020
import com.google.protobuf.InvalidProtocolBufferException;
2121
import org.apache.hadoop.conf.Configuration;
@@ -25,8 +25,6 @@
2525
.OMRequest;
2626
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
2727
.OMResponse;
28-
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Status;
29-
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.Type;
3028
import org.apache.ratis.RaftConfigKeys;
3129
import org.apache.ratis.client.RaftClient;
3230
import org.apache.ratis.conf.RaftProperties;
@@ -54,14 +52,15 @@ private OMRatisHelper() {
5452

5553
/**
5654
* Creates a new RaftClient object.
57-
* @param rpcType Replication Type
58-
* @param omId OM id of the client
59-
* @param group RaftGroup
55+
*
56+
* @param rpcType Replication Type
57+
* @param omId OM id of the client
58+
* @param group RaftGroup
6059
* @param retryPolicy Retry policy
6160
* @return RaftClient object
6261
*/
63-
static RaftClient newRaftClient(RpcType rpcType, String omId, RaftGroup
64-
group, RetryPolicy retryPolicy, Configuration conf) {
62+
public static RaftClient newRaftClient(RpcType rpcType, String omId, RaftGroup
63+
group, RetryPolicy retryPolicy, Configuration conf) {
6564
LOG.trace("newRaftClient: {}, leader={}, group={}", rpcType, omId, group);
6665
final RaftProperties properties = new RaftProperties();
6766
RaftConfigKeys.Rpc.setType(properties, rpcType);
@@ -85,36 +84,27 @@ static RaftPeerId getRaftPeerId(String omId) {
8584
return RaftPeerId.valueOf(omId);
8685
}
8786

88-
static ByteString convertRequestToByteString(OMRequest request) {
87+
public static ByteString convertRequestToByteString(OMRequest request) {
8988
byte[] requestBytes = request.toByteArray();
9089
return ByteString.copyFrom(requestBytes);
9190
}
9291

93-
static OMRequest convertByteStringToOMRequest(ByteString byteString)
92+
public static OMRequest convertByteStringToOMRequest(ByteString byteString)
9493
throws InvalidProtocolBufferException {
9594
byte[] bytes = byteString.toByteArray();
9695
return OMRequest.parseFrom(bytes);
9796
}
9897

99-
static Message convertResponseToMessage(OMResponse response) {
98+
public static Message convertResponseToMessage(OMResponse response) {
10099
byte[] requestBytes = response.toByteArray();
101100
return Message.valueOf(ByteString.copyFrom(requestBytes));
102101
}
103102

104-
static OMResponse getOMResponseFromRaftClientReply(RaftClientReply reply)
105-
throws InvalidProtocolBufferException {
103+
public static OMResponse getOMResponseFromRaftClientReply(
104+
RaftClientReply reply) throws InvalidProtocolBufferException {
106105
byte[] bytes = reply.getMessage().getContent().toByteArray();
107106
return OMResponse.newBuilder(OMResponse.parseFrom(bytes))
108107
.setLeaderOMNodeId(reply.getReplierId())
109108
.build();
110109
}
111-
112-
static OMResponse getErrorResponse(Type cmdType, Exception e) {
113-
return OMResponse.newBuilder()
114-
.setCmdType(cmdType)
115-
.setSuccess(false)
116-
.setMessage(e.getMessage())
117-
.setStatus(Status.INTERNAL_ERROR)
118-
.build();
119-
}
120-
}
110+
}

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.hadoop.ipc.ProtobufHelper;
3636
import org.apache.hadoop.ipc.ProtocolTranslator;
3737
import org.apache.hadoop.ozone.OzoneConfigKeys;
38+
import org.apache.hadoop.ozone.om.exceptions.NotLeaderException;
3839
import org.apache.hadoop.ozone.om.exceptions.OMException;
3940
import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
4041
import org.apache.hadoop.ozone.om.helpers.KeyValueUtil;
@@ -195,29 +196,49 @@ public OzoneManagerProtocolClientSideTranslatorPB(OzoneConfiguration conf,
195196
private OzoneManagerProtocolPB createRetryProxy(
196197
OMFailoverProxyProvider failoverProxyProvider,
197198
int maxRetries, int maxFailovers, int delayMillis, int maxDelayBase) {
199+
198200
RetryPolicy retryPolicyOnNetworkException = RetryPolicies
199201
.failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
200202
maxFailovers, maxRetries, delayMillis, maxDelayBase);
203+
201204
RetryPolicy retryPolicy = new RetryPolicy() {
202205
@Override
203206
public RetryAction shouldRetry(Exception exception, int retries,
204207
int failovers, boolean isIdempotentOrAtMostOnce)
205208
throws Exception {
206-
if (exception instanceof EOFException ||
207-
exception instanceof ServiceException) {
208-
if (retries < maxRetries && failovers < maxFailovers) {
209-
return RetryAction.FAILOVER_AND_RETRY;
209+
210+
if (exception instanceof ServiceException) {
211+
Throwable cause = exception.getCause();
212+
if (cause instanceof NotLeaderException) {
213+
NotLeaderException notLeaderException = (NotLeaderException) cause;
214+
omFailoverProxyProvider.performFailoverIfRequired(
215+
notLeaderException.getSuggestedLeaderNodeId());
216+
return getRetryAction(RetryAction.RETRY, retries, failovers);
210217
} else {
211-
FAILOVER_PROXY_PROVIDER_LOG.error("Failed to connect to OM. " +
212-
"Attempted {} retries and {} failovers", retries, failovers);
213-
return RetryAction.FAIL;
218+
return getRetryAction(RetryAction.FAILOVER_AND_RETRY, retries,
219+
failovers);
214220
}
221+
} else if (exception instanceof EOFException) {
222+
return getRetryAction(RetryAction.FAILOVER_AND_RETRY, retries,
223+
failovers);
215224
} else {
216225
return retryPolicyOnNetworkException.shouldRetry(
217-
exception, retries, failovers, isIdempotentOrAtMostOnce);
226+
exception, retries, failovers, isIdempotentOrAtMostOnce);
227+
}
228+
}
229+
230+
private RetryAction getRetryAction(RetryAction fallbackAction,
231+
int retries, int failovers) {
232+
if (retries < maxRetries && failovers < maxFailovers) {
233+
return fallbackAction;
234+
} else {
235+
FAILOVER_PROXY_PROVIDER_LOG.error("Failed to connect to OM. " +
236+
"Attempted {} retries and {} failovers", retries, failovers);
237+
return RetryAction.FAIL;
218238
}
219239
}
220240
};
241+
221242
OzoneManagerProtocolPB proxy = (OzoneManagerProtocolPB) RetryProxy.create(
222243
OzoneManagerProtocolPB.class, failoverProxyProvider, retryPolicy);
223244
return proxy;

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@
5252
.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY;
5353
import static org.apache.hadoop.ozone.OzoneConfigKeys
5454
.OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT;
55-
import static org.apache.hadoop.ozone.OzoneConfigKeys
56-
.OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY;
5755
import static org.apache.hadoop.ozone.OzoneConfigKeys
5856
.OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
5957
import static org.apache.hadoop.ozone.OzoneConfigKeys
@@ -75,7 +73,7 @@ public class TestOzoneManagerHA {
7573
public ExpectedException exception = ExpectedException.none();
7674

7775
@Rule
78-
public Timeout timeout = new Timeout(120_000);
76+
public Timeout timeout = new Timeout(300_000);
7977

8078
/**
8179
* Create a MiniDFSCluster for testing.
@@ -93,7 +91,6 @@ public void init() throws Exception {
9391
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
9492
conf.setInt(OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY, 3);
9593
conf.setInt(OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, 3);
96-
conf.setInt(OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY, 50);
9794

9895
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
9996
.setClusterId(clusterId)
@@ -313,4 +310,41 @@ public void testOMRetryProxy() throws Exception {
313310
"3 retries and 3 failovers"));
314311
}
315312
}
313+
314+
@Test
315+
public void testReadRequest() throws Exception {
316+
String volumeName = "volume" + RandomStringUtils.randomNumeric(5);
317+
objectStore.createVolume(volumeName);
318+
319+
OMFailoverProxyProvider omFailoverProxyProvider =
320+
objectStore.getClientProxy().getOMProxyProvider();
321+
String currentLeaderNodeId = omFailoverProxyProvider
322+
.getCurrentProxyOMNodeId();
323+
324+
// A read request from any proxy should failover to the current leader OM
325+
for (int i = 0; i < numOfOMs; i++) {
326+
// Failover OMFailoverProxyProvider to OM at index i
327+
OzoneManager ozoneManager = cluster.getOzoneManager(i);
328+
String omHostName = ozoneManager.getOmRpcServerAddr().getHostName();
329+
int rpcPort = ozoneManager.getOmRpcServerAddr().getPort();
330+
331+
// Get the ObjectStore and FailoverProxyProvider for OM at index i
332+
final ObjectStore store = OzoneClientFactory.getRpcClient(
333+
omHostName, rpcPort, conf).getObjectStore();
334+
final OMFailoverProxyProvider proxyProvider =
335+
store.getClientProxy().getOMProxyProvider();
336+
337+
// Failover to the OM node that the objectStore points to
338+
omFailoverProxyProvider.performFailoverIfRequired(
339+
ozoneManager.getOMNodId());
340+
341+
// A read request should result in the proxyProvider failing over to
342+
// leader node.
343+
OzoneVolume volume = store.getVolume(volumeName);
344+
Assert.assertEquals(volumeName, volume.getName());
345+
346+
Assert.assertEquals(currentLeaderNodeId,
347+
proxyProvider.getCurrentProxyOMNodeId());
348+
}
349+
}
316350
}

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,8 +1236,8 @@ private RPC.Server getRpcServer(OzoneConfiguration conf) throws IOException {
12361236
ProtobufRpcEngine.class);
12371237

12381238
BlockingService omService = newReflectiveBlockingService(
1239-
new OzoneManagerProtocolServerSideTranslatorPB(this, omRatisClient,
1240-
isRatisEnabled));
1239+
new OzoneManagerProtocolServerSideTranslatorPB(this, omRatisServer,
1240+
omRatisClient, isRatisEnabled));
12411241
return startRpcServer(configuration, omNodeRpcAddr,
12421242
OzoneManagerProtocolPB.class, omService,
12431243
handlerCount);

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisClient.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.hadoop.conf.Configuration;
3131
import org.apache.hadoop.ozone.OmUtils;
3232
import org.apache.hadoop.ozone.om.OMConfigKeys;
33+
import org.apache.hadoop.ozone.om.helpers.OMRatisHelper;
3334
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
3435
.OMRequest;
3536
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos

0 commit comments

Comments
 (0)