Skip to content

Commit c4a02f7

Browse files
authored
HBASE-28321 RpcConnectionRegistry is broken when security is enabled and we use different principal for master and region server (#5688)
Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
1 parent c3ee1dd commit c4a02f7

33 files changed

+1084
-186
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ static void setCoprocessorError(RpcController controller, Throwable error) {
665665
}
666666
}
667667

668-
static boolean isUnexpectedPreambleHeaderException(IOException e) {
668+
public static boolean isUnexpectedPreambleHeaderException(IOException e) {
669669
if (!(e instanceof RemoteException)) {
670670
return false;
671671
}

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static org.apache.hadoop.hbase.ipc.IPCUtil.toIOE;
2121
import static org.apache.hadoop.hbase.ipc.IPCUtil.wrapException;
2222

23+
import com.google.errorprone.annotations.RestrictedApi;
2324
import io.opentelemetry.api.trace.Span;
2425
import io.opentelemetry.api.trace.StatusCode;
2526
import io.opentelemetry.context.Scope;
@@ -542,6 +543,12 @@ public RpcChannel createRpcChannel(ServerName sn, User user, int rpcTimeout) {
542543
return new RpcChannelImplementation(this, createAddr(sn), user, rpcTimeout);
543544
}
544545

546+
@RestrictedApi(explanation = "Should only be called in tests", link = "",
547+
allowedOnPath = ".*/src/test/.*")
548+
PoolMap<ConnectionId, T> getConnections() {
549+
return connections;
550+
}
551+
545552
private static class AbstractRpcChannel {
546553

547554
protected final Address addr;

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/BlockingRpcConnection.java

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,14 @@
3737
import java.util.ArrayDeque;
3838
import java.util.Locale;
3939
import java.util.Queue;
40+
import java.util.Set;
4041
import java.util.concurrent.ConcurrentHashMap;
4142
import java.util.concurrent.ConcurrentMap;
4243
import java.util.concurrent.ThreadLocalRandom;
4344
import javax.security.sasl.SaslException;
4445
import org.apache.hadoop.conf.Configuration;
4546
import org.apache.hadoop.hbase.DoNotRetryIOException;
47+
import org.apache.hadoop.hbase.client.ConnectionUtils;
4648
import org.apache.hadoop.hbase.exceptions.ConnectionClosingException;
4749
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
4850
import org.apache.hadoop.hbase.ipc.HBaseRpcController.CancellationCallback;
@@ -352,13 +354,13 @@ private void disposeSasl() {
352354
}
353355
}
354356

355-
private boolean setupSaslConnection(final InputStream in2, final OutputStream out2)
356-
throws IOException {
357+
private boolean setupSaslConnection(final InputStream in2, final OutputStream out2,
358+
String serverPrincipal) throws IOException {
357359
if (this.metrics != null) {
358360
this.metrics.incrNsLookups();
359361
}
360362
saslRpcClient = new HBaseSaslRpcClient(this.rpcClient.conf, provider, token,
361-
socket.getInetAddress(), securityInfo, this.rpcClient.fallbackAllowed,
363+
socket.getInetAddress(), serverPrincipal, this.rpcClient.fallbackAllowed,
362364
this.rpcClient.conf.get("hbase.rpc.protection",
363365
QualityOfProtection.AUTHENTICATION.name().toLowerCase(Locale.ROOT)),
364366
this.rpcClient.conf.getBoolean(CRYPTO_AES_ENABLED_KEY, CRYPTO_AES_ENABLED_DEFAULT));
@@ -379,7 +381,8 @@ private boolean setupSaslConnection(final InputStream in2, final OutputStream ou
379381
* </p>
380382
*/
381383
private void handleSaslConnectionFailure(final int currRetries, final int maxRetries,
382-
final Exception ex, final UserGroupInformation user) throws IOException, InterruptedException {
384+
final Exception ex, final UserGroupInformation user, final String serverPrincipal)
385+
throws IOException, InterruptedException {
383386
closeSocket();
384387
user.doAs(new PrivilegedExceptionAction<Object>() {
385388
@Override
@@ -419,25 +422,75 @@ public Object run() throws IOException, InterruptedException {
419422
Thread.sleep(ThreadLocalRandom.current().nextInt(reloginMaxBackoff) + 1);
420423
return null;
421424
} else {
422-
String msg =
423-
"Failed to initiate connection for " + UserGroupInformation.getLoginUser().getUserName()
424-
+ " to " + securityInfo.getServerPrincipal();
425+
String msg = "Failed to initiate connection for "
426+
+ UserGroupInformation.getLoginUser().getUserName() + " to " + serverPrincipal;
425427
throw new IOException(msg, ex);
426428
}
427429
}
428430
});
429431
}
430432

431-
private void getConnectionRegistry(OutputStream outStream) throws IOException {
433+
private void getConnectionRegistry(InputStream inStream, OutputStream outStream,
434+
Call connectionRegistryCall) throws IOException {
432435
outStream.write(RpcClient.REGISTRY_PREAMBLE_HEADER);
436+
readResponse(new DataInputStream(inStream), calls, connectionRegistryCall, remoteExc -> {
437+
synchronized (this) {
438+
closeConn(remoteExc);
439+
}
440+
});
433441
}
434442

435443
private void createStreams(InputStream inStream, OutputStream outStream) {
436444
this.in = new DataInputStream(new BufferedInputStream(inStream));
437445
this.out = new DataOutputStream(new BufferedOutputStream(outStream));
438446
}
439447

440-
private void setupIOstreams() throws IOException {
448+
// choose the server principal to use
449+
private String chooseServerPrincipal(InputStream inStream, OutputStream outStream)
450+
throws IOException {
451+
Set<String> serverPrincipals = getServerPrincipals();
452+
if (serverPrincipals.size() == 1) {
453+
return serverPrincipals.iterator().next();
454+
}
455+
// this means we use kerberos authentication and there are multiple server principal candidates,
456+
// in this way we need to send a special preamble header to get server principal from server
457+
Call securityPreambleCall = createSecurityPreambleCall(r -> {
458+
});
459+
outStream.write(RpcClient.SECURITY_PREAMBLE_HEADER);
460+
readResponse(new DataInputStream(inStream), calls, securityPreambleCall, remoteExc -> {
461+
synchronized (this) {
462+
closeConn(remoteExc);
463+
}
464+
});
465+
if (securityPreambleCall.error != null) {
466+
LOG.debug("Error when trying to do a security preamble call to {}", remoteId.address,
467+
securityPreambleCall.error);
468+
if (ConnectionUtils.isUnexpectedPreambleHeaderException(securityPreambleCall.error)) {
469+
// this means we are connecting to an old server which does not support the security
470+
// preamble call, so we should fallback to randomly select a principal to use
471+
// TODO: find a way to reconnect without failing all the pending calls, for now, when we
472+
// reach here, shutdown should have already been scheduled
473+
throw securityPreambleCall.error;
474+
}
475+
if (IPCUtil.isSecurityNotEnabledException(securityPreambleCall.error)) {
476+
// server tells us security is not enabled, then we should check whether fallback to
477+
// simple is allowed, if so we just go without security, otherwise we should fail the
478+
// negotiation immediately
479+
if (rpcClient.fallbackAllowed) {
480+
// TODO: just change the preamble and skip the fallback to simple logic, for now, just
481+
// select the first principal can finish the connection setup, but waste one client
482+
// message
483+
return serverPrincipals.iterator().next();
484+
} else {
485+
throw new FallbackDisallowedException();
486+
}
487+
}
488+
return randomSelect(serverPrincipals);
489+
}
490+
return chooseServerPrincipal(serverPrincipals, securityPreambleCall);
491+
}
492+
493+
private void setupIOstreams(Call connectionRegistryCall) throws IOException {
441494
if (socket != null) {
442495
// The connection is already available. Perfect.
443496
return;
@@ -465,32 +518,37 @@ private void setupIOstreams() throws IOException {
465518
// This creates a socket with a write timeout. This timeout cannot be changed.
466519
OutputStream outStream = NetUtils.getOutputStream(socket, this.rpcClient.writeTO);
467520
if (connectionRegistryCall != null) {
468-
getConnectionRegistry(outStream);
469-
createStreams(inStream, outStream);
470-
break;
521+
getConnectionRegistry(inStream, outStream, connectionRegistryCall);
522+
closeSocket();
523+
return;
471524
}
472-
// Write out the preamble -- MAGIC, version, and auth to use.
473-
writeConnectionHeaderPreamble(outStream);
525+
474526
if (useSasl) {
475-
final InputStream in2 = inStream;
476-
final OutputStream out2 = outStream;
477527
UserGroupInformation ticket = provider.getRealUser(remoteId.ticket);
478528
boolean continueSasl;
479529
if (ticket == null) {
480530
throw new FatalConnectionException("ticket/user is null");
481531
}
532+
String serverPrincipal = chooseServerPrincipal(inStream, outStream);
533+
// Write out the preamble -- MAGIC, version, and auth to use.
534+
writeConnectionHeaderPreamble(outStream);
482535
try {
536+
final InputStream in2 = inStream;
537+
final OutputStream out2 = outStream;
483538
continueSasl = ticket.doAs(new PrivilegedExceptionAction<Boolean>() {
484539
@Override
485540
public Boolean run() throws IOException {
486-
return setupSaslConnection(in2, out2);
541+
return setupSaslConnection(in2, out2, serverPrincipal);
487542
}
488543
});
489544
} catch (Exception ex) {
490545
ExceptionUtil.rethrowIfInterrupt(ex);
491-
handleSaslConnectionFailure(numRetries++, reloginMaxRetries, ex, ticket);
546+
saslNegotiationDone(serverPrincipal, false);
547+
handleSaslConnectionFailure(numRetries++, reloginMaxRetries, ex, ticket,
548+
serverPrincipal);
492549
continue;
493550
}
551+
saslNegotiationDone(serverPrincipal, true);
494552
if (continueSasl) {
495553
// Sasl connect is successful. Let's set up Sasl i/o streams.
496554
inStream = saslRpcClient.getInputStream();
@@ -501,6 +559,9 @@ public Boolean run() throws IOException {
501559
// reconnecting because regionserver may change its sasl config after restart.
502560
saslRpcClient = null;
503561
}
562+
} else {
563+
// Write out the preamble -- MAGIC, version, and auth to use.
564+
writeConnectionHeaderPreamble(outStream);
504565
}
505566
createStreams(inStream, outStream);
506567
// Now write out the connection header
@@ -618,9 +679,10 @@ private void writeRequest(Call call) throws IOException {
618679
}
619680
RequestHeader requestHeader = buildRequestHeader(call, cellBlockMeta);
620681
if (call.isConnectionRegistryCall()) {
621-
connectionRegistryCall = call;
682+
setupIOstreams(call);
683+
return;
622684
}
623-
setupIOstreams();
685+
setupIOstreams(null);
624686

625687
// Now we're going to write the call. We take the lock, then check that the connection
626688
// is still valid, and, if so we do the write to the socket. If the write fails, we don't
@@ -655,7 +717,7 @@ private void writeRequest(Call call) throws IOException {
655717
*/
656718
private void readResponse() {
657719
try {
658-
readResponse(in, calls, remoteExc -> {
720+
readResponse(in, calls, null, remoteExc -> {
659721
synchronized (this) {
660722
closeConn(remoteExc);
661723
}

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/Call.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import io.opentelemetry.api.trace.Span;
2121
import java.io.IOException;
2222
import java.util.Map;
23-
import java.util.Optional;
2423
import org.apache.commons.lang3.builder.ToStringBuilder;
2524
import org.apache.commons.lang3.builder.ToStringStyle;
2625
import org.apache.hadoop.hbase.CellScanner;
@@ -85,16 +84,15 @@ class Call {
8584
* Builds a simplified {@link #toString()} that includes just the id and method name.
8685
*/
8786
public String toShortString() {
87+
// Call[id=32153218,methodName=Get]
8888
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("id", id)
89-
.append("methodName", md.getName()).toString();
89+
.append("methodName", md != null ? md.getName() : "").toString();
9090
}
9191

9292
@Override
9393
public String toString() {
94-
// Call[id=32153218,methodName=Get]
9594
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(toShortString())
96-
.append("param", Optional.ofNullable(param).map(ProtobufUtil::getShortTextFormat).orElse(""))
97-
.toString();
95+
.append("param", param != null ? ProtobufUtil.getShortTextFormat(param) : "").toString();
9896
}
9997

10098
/**

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,11 @@ static boolean isFatalConnectionException(ExceptionResponse e) {
178178
}
179179
}
180180

181+
static boolean isSecurityNotEnabledException(IOException e) {
182+
return e instanceof RemoteException
183+
&& SecurityNotEnabledException.class.getName().equals(((RemoteException) e).getClassName());
184+
}
185+
181186
static IOException toIOE(Throwable t) {
182187
if (t instanceof IOException) {
183188
return (IOException) t;

0 commit comments

Comments
 (0)