Skip to content

YARN-2774. support secure clusters in shared cache manager #881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ It's recommended to have them share a Unix group, e.g. `hadoop`. See also "[Mapp
| User:Group | Daemons |
|:--------------|:----------------------------------------------------|
| hdfs:hadoop | NameNode, Secondary NameNode, JournalNode, DataNode |
| yarn:hadoop | ResourceManager, NodeManager |
| yarn:hadoop | ResourceManager, NodeManager, SharedCacheManager |
| mapred:hadoop | MapReduce JobHistory Server |

### Kerberos principals for Hadoop Daemons
Expand Down Expand Up @@ -117,6 +117,18 @@ The NodeManager keytab file, on each host, should look like the following:
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)

The SharedCacheManager keytab file, on that host, should look like the following:

$ klist -e -k -t /etc/security/keytab/scm.service.keytab
Keytab name: FILE:/etc/security/keytab/scm.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 scm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 scm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 scm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)

#### MapReduce JobHistory Server

The MapReduce JobHistory Server keytab file, on that host, should look like the following:
Expand Down Expand Up @@ -333,6 +345,13 @@ The following settings allow configuring SSL access to the NameNode web UI (opti
| `yarn.nodemanager.linux-container-executor.path` | `/path/to/bin/container-executor` | The path to the executable of Linux container executor. |
| `yarn.nodemanager.webapp.https.address` | `0.0.0.0:8044` | The https adddress of the NM web application. |

### SharedCacheManager

| Parameter | Value | Notes |
|:-----------------------------|:------------------------------------------|:----------------------------------------------------|
| `yarn.sharedcache.principal` | `scm/_HOST@REALM.TLD` | Kerberos principal name for the SharedCacheManager. |
| `yarn.sharedcache.keytab` | `/etc/security/keytab/scm.service.keytab` | Kerberos keytab file for the SharedCacheManager. |

### Configuration for WebAppProxy

The `WebAppProxy` provides a proxy between the web applications exported by an application and an end user. If security is enabled it will warn users before accessing a potentially unsafe web application. Authentication and authorization using the proxy is handled just like any other privileged web application.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@
package org.apache.hadoop.yarn.api;

import org.apache.hadoop.ipc.ProtocolInfo;
import org.apache.hadoop.security.KerberosInfo;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.ClientSCMProtocol.ClientSCMProtocolService;

/**
* This is protocol interface used by shared cache client to interacte with
* shared cache manager.
*/
@KerberosInfo(
serverPrincipal = YarnConfiguration.SCM_PRINCIPAL)
@ProtocolInfo(protocolName = "org.apache.hadoop.yarn.api.ClientSCMProtocolPB",
protocolVersion = 1)
public interface ClientSCMProtocolPB extends
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2425,6 +2425,18 @@ public static boolean isAclEnabled(Configuration conf) {
YARN_SECURITY_SERVICE_AUTHORIZATION_APPLICATIONMASTER_NODEMANAGER_PROTOCOL =
"security.applicationmaster-nodemanager.applicationmaster.protocol.acl";

public static final String
YARN_SECURITY_SERVICE_AUTHORIZATION_SHAREDCACHEMANAGER_CLIENT_PROTOCOL =
"security.sharedcachemanager.client.protocol.acl";

public static final String
YARN_SECURITY_SERVICE_AUTHORIZATION_SHAREDCACHEMANAGER_ADMIN_PROTOCOL =
"security.sharedcachemanager.admin.protocol.acl";

public static final String
YARN_SECURITY_SERVICE_AUTHORIZATION_SHAREDCACHEMANAGER_UPLOADER_PROTOCOL =
"security.sharedcachemanager.uploader.protocol.acl";

/** No. of milliseconds to wait between sending a SIGTERM and SIGKILL
* to a running container */
public static final String NM_SLEEP_DELAY_BEFORE_SIGKILL_MS =
Expand Down Expand Up @@ -3342,6 +3354,13 @@ public static boolean isAclEnabled(Configuration conf) {
SHARED_CACHE_PREFIX + "nm.uploader.thread-count";
public static final int DEFAULT_SHARED_CACHE_NM_UPLOADER_THREAD_COUNT = 20;

/** The keytab for the shared cache manager.*/
public static final String SCM_KEYTAB =
SHARED_CACHE_PREFIX + "keytab";

public static final String SCM_PRINCIPAL =
SHARED_CACHE_PREFIX + "principal";

////////////////////////////////
// Federation Configs
////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.ipc.ProtocolInfo;
import org.apache.hadoop.security.KerberosInfo;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.SCMAdminProtocol.SCMAdminProtocolService;

@Private
@Unstable
@KerberosInfo(
serverPrincipal = YarnConfiguration.SCM_PRINCIPAL)
@ProtocolInfo(protocolName = "org.apache.hadoop.yarn.server.api.SCMAdminProtocolPB",
protocolVersion = 1)
public interface SCMAdminProtocolPB extends
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2836,6 +2836,45 @@
<value>20</value>
</property>

<property>
<description>The Kerberos principal for the shared cache manager.
</description>
<name>yarn.sharedcache.principal</name>
<value></value>
</property>

<property>
<description>The Kerberos keytab for the shared cache manager.
</description>
<name>yarn.sharedcache.keytab</name>
<value></value>
</property>

<property>
<description>
ACL protocol used in shared cache manager to control client request.
</description>
<name>security.sharedcachemanager.client.protocol.acl</name>
<value></value>
</property>

<property>
<description>
ACL protocol used in shared cache manager for admin RPC request.
</description>
<name>security.sharedcachemanager.admin.protocol.acl</name>
<value></value>
</property>

<property>
<description>
ACL protocol used in shared cache manager for uploader requestion from
node manager.
</description>
<name>security.sharedcachemanager.uploader.protocol.acl</name>
<value></value>
</property>

<property>
<description>
ACL protocol for use in the Timeline server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@
package org.apache.hadoop.yarn.server.api;

import org.apache.hadoop.ipc.ProtocolInfo;
import org.apache.hadoop.security.KerberosInfo;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.SCMUploaderProtocol.SCMUploaderProtocolService;

/**
* Protocol interface that provide uploading interface. The client should be
* node manager and the server is shared cache manager.
*/
@KerberosInfo(
serverPrincipal = YarnConfiguration.SCM_PRINCIPAL)
@ProtocolInfo(protocolName = "org.apache.hadoop.yarn.server.api.SCMUploaderProtocolPB",
protocolVersion = 1)
public interface SCMUploaderProtocolPB extends
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
import java.io.IOException;
import java.net.InetSocketAddress;

import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ClientSCMProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.ReleaseSharedCacheResourceRequest;
Expand All @@ -41,6 +44,7 @@
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.server.sharedcache.SharedCacheUtil;
import org.apache.hadoop.yarn.server.sharedcachemanager.metrics.ClientSCMMetrics;
import org.apache.hadoop.yarn.server.sharedcachemanager.security.SCMPolicyProvider;
import org.apache.hadoop.yarn.server.sharedcachemanager.store.SCMStore;
import org.apache.hadoop.yarn.server.sharedcachemanager.store.SharedCacheResourceReference;
import org.slf4j.Logger;
Expand Down Expand Up @@ -105,7 +109,13 @@ protected void serviceStart() throws Exception {
conf.getInt(YarnConfiguration.SCM_CLIENT_SERVER_THREAD_COUNT,
YarnConfiguration.DEFAULT_SCM_CLIENT_SERVER_THREAD_COUNT));

// TODO (YARN-2774): Enable service authorization
// TODO: dynamically load ACLs
// Enable service authorization
if (conf.getBoolean(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
false)) {
refreshServiceAcls(conf, SCMPolicyProvider.getInstance());
}

this.server.start();
clientBindAddress =
Expand All @@ -115,6 +125,12 @@ protected void serviceStart() throws Exception {
super.serviceStart();
}

private void refreshServiceAcls(Configuration configuration,
PolicyProvider policyProvider) {
this.server.refreshServiceAclWithLoadedConfiguration(configuration,
policyProvider);
}

@Override
protected void serviceStop() throws Exception {
if (this.server != null) {
Expand Down Expand Up @@ -190,4 +206,9 @@ private String getCacheEntryFilePath(String checksum, String filename) {
return SharedCacheUtil.getCacheEntryPath(this.cacheDepth,
this.cacheRoot, checksum) + Path.SEPARATOR_CHAR + filename;
}

@VisibleForTesting
protected Server getServer() {
return server;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
import java.io.IOException;
import java.net.InetSocketAddress;

import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
import org.apache.hadoop.yarn.server.api.SCMAdminProtocol;
Expand All @@ -38,6 +41,7 @@
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.server.sharedcachemanager.security.SCMPolicyProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -89,7 +93,14 @@ protected void serviceStart() throws Exception {
conf.getInt(YarnConfiguration.SCM_ADMIN_CLIENT_THREAD_COUNT,
YarnConfiguration.DEFAULT_SCM_ADMIN_CLIENT_THREAD_COUNT));

// TODO: Enable service authorization (see YARN-2774)
// TODO: dynamically load ACLs
// Enable service authorization
if (conf.getBoolean(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
false)) {
refreshServiceAcls(
conf, SCMPolicyProvider.getInstance());
}

this.server.start();
clientBindAddress =
Expand All @@ -99,6 +110,12 @@ protected void serviceStart() throws Exception {
super.serviceStart();
}

private void refreshServiceAcls(Configuration configuration,
PolicyProvider policyProvider) {
this.server.refreshServiceAclWithLoadedConfiguration(configuration,
policyProvider);
}

@Override
protected void serviceStop() throws Exception {
if (this.server != null) {
Expand Down Expand Up @@ -141,4 +158,9 @@ public RunSharedCacheCleanerTaskResponse runCleanerTask(
response.setAccepted(true);
return response;
}

@VisibleForTesting
public Server getServer() {
return server;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@

package org.apache.hadoop.yarn.server.sharedcachemanager;

import java.io.IOException;
import java.net.InetSocketAddress;

import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
Expand Down Expand Up @@ -63,6 +67,13 @@ public SharedCacheManager() {
@Override
protected void serviceInit(Configuration conf) throws Exception {

try {
doSecureLogin(conf);
} catch(IOException ie) {
throw new YarnRuntimeException(
"Shared cache manager failed to login", ie);
}

this.store = createSCMStoreService(conf);
addService(store);

Expand Down Expand Up @@ -130,6 +141,15 @@ private SCMWebServer createSCMWebServer(SharedCacheManager scm) {
return new SCMWebServer(scm);
}

protected void doSecureLogin(Configuration conf) throws IOException {
InetSocketAddress socAddr = conf.getSocketAddr(
YarnConfiguration.SCM_ADMIN_ADDRESS,
YarnConfiguration.DEFAULT_SCM_ADMIN_ADDRESS,
YarnConfiguration.DEFAULT_SCM_ADMIN_PORT);
SecurityUtil.login(conf, YarnConfiguration.SCM_KEYTAB,
YarnConfiguration.SCM_PRINCIPAL, socAddr.getHostName());
}

@Override
protected void serviceStop() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@
import java.io.IOException;
import java.net.InetSocketAddress;

import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
Expand All @@ -35,6 +38,7 @@
import org.apache.hadoop.yarn.server.api.protocolrecords.SCMUploaderNotifyRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.SCMUploaderNotifyResponse;
import org.apache.hadoop.yarn.server.sharedcachemanager.metrics.SharedCacheUploaderMetrics;
import org.apache.hadoop.yarn.server.sharedcachemanager.security.SCMPolicyProvider;
import org.apache.hadoop.yarn.server.sharedcachemanager.store.SCMStore;

/**
Expand Down Expand Up @@ -81,7 +85,14 @@ protected void serviceStart() throws Exception {
conf.getInt(YarnConfiguration.SCM_UPLOADER_SERVER_THREAD_COUNT,
YarnConfiguration.DEFAULT_SCM_UPLOADER_SERVER_THREAD_COUNT));

// TODO (YARN-2774): Enable service authorization
// TODO: dynamically load ACLs
// Enable service authorization
if (conf.getBoolean(
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
false)) {
refreshServiceAcls(
conf, SCMPolicyProvider.getInstance());
}

this.server.start();
bindAddress =
Expand All @@ -91,6 +102,12 @@ protected void serviceStart() throws Exception {
super.serviceStart();
}

private void refreshServiceAcls(Configuration configuration,
PolicyProvider policyProvider) {
this.server.refreshServiceAclWithLoadedConfiguration(configuration,
policyProvider);
}

@Override
protected void serviceStop() throws Exception {
if (this.server != null) {
Expand Down Expand Up @@ -137,4 +154,9 @@ public SCMUploaderCanUploadResponse canUpload(
response.setUploadable(true);
return response;
}

@VisibleForTesting
protected Server getServer() {
return server;
}
}
Loading