Skip to content

Commit 75086bc

Browse files
authored
Merge branch 'master' into add-pyspark-tests
2 parents 90cca8a + e67eaf8 commit 75086bc

File tree

7 files changed

+251
-64
lines changed

7 files changed

+251
-64
lines changed

cli/dcos_spark/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = 'SNAPSHOT'
1+
version = '0.5.19'

conf/spark-env.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,24 @@
77
# moves those config files into the standard directory. In DCOS, the
88
# CLI reads the "SPARK_HDFS_CONFIG_URL" marathon label in order to set
99
# spark.mesos.uris
10+
1011
mkdir -p "${HADOOP_CONF_DIR}"
1112
[ -f "${MESOS_SANDBOX}/hdfs-site.xml" ] && cp "${MESOS_SANDBOX}/hdfs-site.xml" "${HADOOP_CONF_DIR}"
1213
[ -f "${MESOS_SANDBOX}/core-site.xml" ] && cp "${MESOS_SANDBOX}/core-site.xml" "${HADOOP_CONF_DIR}"
1314

14-
MESOS_NATIVE_JAVA_LIBRARY=/usr/local/lib/libmesos.so
15+
cd $MESOS_SANDBOX
16+
17+
MESOS_NATIVE_JAVA_LIBRARY=/usr/lib/libmesos.so
1518

1619
# Support environments without DNS
1720
if [ -n "$LIBPROCESS_IP" ]; then
1821
SPARK_LOCAL_IP=${LIBPROCESS_IP}
1922
fi
2023

24+
# I first set this to MESOS_SANDBOX, as a Workaround for MESOS-5866
25+
# But this fails now due to MESOS-6391, so I'm setting it to /tmp
26+
MESOS_DIRECTORY=/tmp
27+
2128
# Options read when launching programs locally with
2229
# ./bin/run-example or ./bin/spark-submit
2330
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files

docker/Dockerfile

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# docker build -t spark:git-`git rev-parse --short HEAD` .
1919

2020
# Basing from Mesos image so the Mesos native library is present.
21-
FROM mesosphere/mesos-modules-private:git-e348e3f
21+
FROM mesosphere/mesos-modules-private:dcos-ee-mesos-modules-1.8.5-rc2
2222
MAINTAINER Michael Gummelt <mgummelt@mesosphere.io>
2323

2424
# Set environment variables.
@@ -27,24 +27,25 @@ ENV DEBCONF_NONINTERACTIVE_SEEN "true"
2727

2828
# Upgrade package index and install basic commands.
2929
RUN apt-get update && \
30-
apt-get install -y software-properties-common runit nginx
30+
apt-get install -y \
31+
software-properties-common \
32+
runit \
33+
nginx
34+
3135
RUN add-apt-repository ppa:openjdk-r/ppa
3236
RUN apt-get update && \
3337
apt-get install -y openjdk-8-jdk curl
3438

3539
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
36-
ENV MESOS_NATIVE_JAVA_LIBRARY /usr/local/lib/libmesos.so
40+
ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so
3741
ENV HADOOP_CONF_DIR /etc/hadoop
3842

3943
RUN mkdir /etc/hadoop
4044

41-
ADD dist /opt/spark/dist
4245
ADD runit/service /var/lib/runit/service
4346
ADD runit/init.sh /sbin/init.sh
4447
ADD nginx /etc/nginx
4548

46-
#RUN ln -sf /usr/lib/libmesos.so /usr/lib/libmesos-0.23.1.so
47-
4849
# The following symlinks are hacks to make spark-class work under the
4950
# restricted PATH (/usr/bin) set by the DCOS
5051
# --executor-environment-variables option
@@ -55,4 +56,13 @@ RUN ln -s /bin/grep /usr/bin/grep
5556
RUN ln -s /var/lib/runit/service/spark /etc/service/spark
5657
RUN ln -s /var/lib/runit/service/nginx /etc/service/nginx
5758

59+
RUN chmod -R ugo+rw /etc/nginx
60+
RUN chmod -R ugo+rw /etc/service
61+
RUN chmod -R ugo+rw /var/lib/
62+
RUN chmod -R ugo+rw /var/run/
63+
RUN chmod -R ugo+rw /var/log/
64+
65+
ADD dist /opt/spark/dist
66+
RUN chmod -R ugo+rw /opt/spark/dist
67+
5868
WORKDIR /opt/spark/dist

docker/runit/service/spark/run

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,64 +4,83 @@ set -x
44

55
exec 2>&1
66

7-
export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}"
8-
9-
cd /opt/spark/dist
10-
11-
export SPARK_DAEMON_JAVA_OPTS=""
12-
if [ "${DCOS_SERVICE_NAME}" != "spark" ]; then
13-
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.deploy.zookeeper.dir=/spark_mesos_dispatcher_${DCOS_SERVICE_NAME}"
14-
fi
15-
16-
if [ "$SPARK_DISPATCHER_MESOS_ROLE" != "" ]; then
17-
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.role=$SPARK_DISPATCHER_MESOS_ROLE"
18-
fi
19-
20-
if [ "$SPARK_DISPATCHER_MESOS_PRINCIPAL" != "" ]; then
21-
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.principal=$SPARK_DISPATCHER_MESOS_PRINCIPAL"
22-
fi
23-
24-
if [ "$SPARK_DISPATCHER_MESOS_SECRET" != "" ]; then
25-
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.secret=$SPARK_DISPATCHER_MESOS_SECRET"
26-
fi
27-
28-
29-
30-
HISTORY_SERVER_CONF=""
31-
if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then
32-
HISTORY_SERVER_CONF="spark.mesos.historyServer.url=${HISTORY_SERVER_WEB_PROXY_BASE}"
33-
fi
34-
35-
sed "s,<HISTORY_SERVER_CONF>,${HISTORY_SERVER_CONF}," \
36-
conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties
7+
function export_daemon_opts() {
8+
export SPARK_DAEMON_JAVA_OPTS=""
9+
if [ "${DCOS_SERVICE_NAME}" != "spark" ]; then
10+
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.deploy.zookeeper.dir=/spark_mesos_dispatcher_${DCOS_SERVICE_NAME}"
11+
fi
12+
13+
if [ "$SPARK_DISPATCHER_MESOS_ROLE" != "" ]; then
14+
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.role=$SPARK_DISPATCHER_MESOS_ROLE"
15+
fi
16+
17+
if [ "$SPARK_DISPATCHER_MESOS_PRINCIPAL" != "" ]; then
18+
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.principal=$SPARK_DISPATCHER_MESOS_PRINCIPAL"
19+
fi
20+
21+
if [ "$SPARK_DISPATCHER_MESOS_SECRET" != "" ]; then
22+
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.secret=$SPARK_DISPATCHER_MESOS_SECRET"
23+
fi
24+
}
3725

38-
sed "s,<LOG_LEVEL>,${SPARK_LOG_LEVEL}," \
39-
conf/log4j.properties.template >conf/log4j.properties
26+
function set_log_level() {
27+
sed "s,<LOG_LEVEL>,${SPARK_LOG_LEVEL}," \
28+
/opt/spark/dist/conf/log4j.properties.template >/opt/spark/dist/conf/log4j.properties
29+
}
4030

4131
function add_if_non_empty() {
4232
if [ -n "$2" ]; then
43-
echo "$1=$2" >> conf/mesos-cluster-dispatcher.properties
33+
echo "$1=$2" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
4434
fi
4535
}
4636

47-
if [ "${SPARK_SSL_KEYSTOREBASE64}" != "" ]; then
48-
echo "${SPARK_SSL_KEYSTOREBASE64}" | base64 -d > /tmp/dispatcher-keystore.jks
49-
add_if_non_empty spark.ssl.keyStore /tmp/dispatcher-keystore.jks
50-
fi
37+
function configure_properties() {
38+
HISTORY_SERVER_CONF=""
39+
if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then
40+
HISTORY_SERVER_CONF="spark.mesos.historyServer.url=${HISTORY_SERVER_WEB_PROXY_BASE}"
41+
fi
42+
43+
sed "s,<HISTORY_SERVER_CONF>,${HISTORY_SERVER_CONF}," \
44+
/opt/spark/dist/conf/mesos-cluster-dispatcher.properties.template >/opt/spark/dist/conf/mesos-cluster-dispatcher.properties
45+
46+
if [ "${SPARK_SSL_KEYSTOREBASE64}" != "" ]; then
47+
echo "${SPARK_SSL_KEYSTOREBASE64}" | base64 -d > /tmp/dispatcher-keystore.jks
48+
add_if_non_empty spark.ssl.keyStore /tmp/dispatcher-keystore.jks
49+
fi
50+
51+
if [ "${SPARK_SSL_TRUSTSTOREBASE64}" != "" ]; then
52+
echo "${SPARK_SSL_TRUSTSTOREBASE64}" | base64 -d > /tmp/dispatcher-truststore.jks
53+
add_if_non_empty spark.ssl.trustStore /tmp/dispatcher-truststore.jks
54+
fi
55+
56+
add_if_non_empty spark.ssl.enabled "${SPARK_SSL_ENABLED}"
57+
add_if_non_empty spark.ssl.keyPassword "${SPARK_SSL_KEYPASSWORD}"
58+
add_if_non_empty spark.ssl.keyStorePassword "${SPARK_SSL_KEYSTOREPASSWORD}"
59+
add_if_non_empty spark.ssl.trustStorePassword "${SPARK_SSL_TRUSTSTOREPASSWORD}"
60+
add_if_non_empty spark.ssl.protocol "${SPARK_SSL_PROTOCOL}"
61+
add_if_non_empty spark.ssl.enabledAlgorithms "${SPARK_SSL_ENABLEDALGORITHMS}"
62+
63+
# write defaults
64+
if [ "${DCOS_SERVICE_ACCOUNT_CREDENTIAL}" != "" ]; then
65+
# write defaults using both property names, since 2.0 uses one and 2.1 uses the other
66+
echo "spark.mesos.dispatcher.driverDefault.spark.mesos.driverEnv.MESOS_MODULES=file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
67+
echo "spark.mesos.cluster.taskProperty.spark.mesos.driverEnv.MESOS_MODULES=file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
68+
69+
echo "spark.mesos.dispatcher.driverDefault.spark.mesos.driverEnv.MESOS_AUTHENTICATEE=com_mesosphere_dcos_ClassicRPCAuthenticatee" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
70+
echo "spark.mesos.cluster.taskProperty.spark.mesos.driverEnv.MESOS_AUTHENTICATEE=com_mesosphere_dcos_ClassicRPCAuthenticatee" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
71+
72+
echo "spark.mesos.dispatcher.driverDefault.spark.mesos.principal=${SPARK_DISPATCHER_MESOS_PRINCIPAL}" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
73+
echo "spark.mesos.cluster.taskProperty.spark.mesos.principal=${SPARK_DISPATCHER_MESOS_PRINCIPAL}" >> /opt/spark/dist/conf/mesos-cluster-dispatcher.properties
74+
fi
75+
}
5176

52-
if [ "${SPARK_SSL_TRUSTSTOREBASE64}" != "" ]; then
53-
echo "${SPARK_SSL_TRUSTSTOREBASE64}" | base64 -d > /tmp/dispatcher-truststore.jks
54-
add_if_non_empty spark.ssl.trustStore /tmp/dispatcher-truststore.jks
55-
fi
5677

57-
add_if_non_empty spark.ssl.enabled "${SPARK_SSL_ENABLED}"
58-
add_if_non_empty spark.ssl.keyPassword "${SPARK_SSL_KEYPASSWORD}"
59-
add_if_non_empty spark.ssl.keyStorePassword "${SPARK_SSL_KEYSTOREPASSWORD}"
60-
add_if_non_empty spark.ssl.trustStorePassword "${SPARK_SSL_TRUSTSTOREPASSWORD}"
61-
add_if_non_empty spark.ssl.protocol "${SPARK_SSL_PROTOCOL}"
62-
add_if_non_empty spark.ssl.enabledAlgorithms "${SPARK_SSL_ENABLEDALGORITHMS}"
78+
export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}"
79+
set_log_level
80+
export_daemon_opts
81+
configure_properties
82+
ZK="master.mesos:2181"
6383

64-
export ZK="master.mesos:2181"
6584
exec /opt/spark/dist/bin/spark-class \
6685
org.apache.spark.deploy.mesos.MesosClusterDispatcher \
6786
--port "${DISPATCHER_PORT}" \
@@ -70,4 +89,4 @@ exec /opt/spark/dist/bin/spark-class \
7089
--zk "${ZK}" \
7190
--host "${HOST}" \
7291
--name "${DCOS_SERVICE_NAME}" \
73-
--properties-file "conf/mesos-cluster-dispatcher.properties"
92+
--properties-file "/opt/spark/dist/conf/mesos-cluster-dispatcher.properties"

docs/user-docs.md

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,114 @@ to the history server entry for that job.
276276

277277
<a name="ssl"></a>
278278

279-
### SSL
279+
### Security
280+
281+
#### Mesos
282+
283+
##### SSL
284+
285+
<table class="table">
286+
<tr>
287+
<td>
288+
`security.mesos.ssl.enabled`
289+
</td>
290+
291+
<td>
292+
Set to true to enable SSL on Mesos communication (default: false).
293+
</td>
294+
</tr>
295+
</table>
296+
297+
298+
##### Authentication
299+
300+
When running in
301+
[DC/OS strict security mode](https://docs.mesosphere.com/latest/administration/id-and-access-mgt/),
302+
Both the dispatcher and jobs must authenticate to Mesos using a [DC/OS
303+
Service Account](https://docs.mesosphere.com/1.8/administration/id-and-access-mgt/service-auth/).
304+
Follow these instructions to authenticate in strict mode:
305+
306+
1. Create a Service Account
307+
308+
Instructions
309+
[here](https://docs.mesosphere.com/1.8/administration/id-and-access-mgt/service-auth/universe-service-auth/).
310+
311+
2. Assign Permissions
312+
313+
First, allow Spark to run tasks as root:
314+
315+
```
316+
$ curl -k -L -X PUT \
317+
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
318+
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:task:user:root" \
319+
-d '{"description":"Allows root to execute tasks"}' \
320+
-H 'Content-Type: application/json'
321+
322+
$ curl -k -L -X PUT \
323+
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
324+
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:task:user:root/users/${SERVICE_ACCOUNT_NAME}/create"
325+
```
326+
327+
Now you must allow Spark to register under the desired role. This is
328+
the value used for `service.role` when installing Spark (default:
329+
`*`):
330+
331+
```
332+
$ export ROLE=<service.role value>
333+
$ curl -k -L -X PUT \
334+
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
335+
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:framework:role:${ROLE}" \
336+
-d '{"description":"Allows ${ROLE} to register as a framework with the Mesos master"}' \
337+
-H 'Content-Type: application/json'
338+
339+
$ curl -k -L -X PUT \
340+
-H "Authorization: token=$(dcos config show core.dcos_acs_token)" \
341+
"$(dcos config show core.dcos_url)/acs/api/v1/acls/dcos:mesos:master:framework:role:${ROLE}/users/${SERVICE_ACCOUNT_NAME}/create"
342+
```
343+
344+
3. Install Spark
345+
346+
```
347+
$ dcos package install spark --options=config.json
348+
```
349+
350+
Where `config.json` contains the following JSON. Replace
351+
`<principal>` with the name of your service account, and
352+
`<secret_name>` with the name of the DC/OS secret containing your
353+
service account's private key. These values were created in Step #1
354+
above.
355+
356+
```
357+
{
358+
"service": {
359+
"principal": "<principal>",
360+
"user": "nobody"
361+
},
362+
"security": {
363+
"mesos": {
364+
"authentication": {
365+
"secret_name": "<secret_name>"
366+
}
367+
}
368+
}
369+
}
370+
```
371+
372+
4. Submit a Job
373+
374+
We've now installed the Spark Dispatcher, which is authenticating
375+
itself to the Mesos master. Spark jobs are also frameworks which must
376+
authenticate. The dispatcher will pass the secret along to the jobs,
377+
so all that's left to do is configure our jobs to use DC/OS authentication:
378+
379+
```
380+
$ PROPS="-Dspark.mesos.driverEnv.MESOS_MODULES=file:///opt/mesosphere/etc/mesos-scheduler-modules/dcos_authenticatee_module.json "
381+
$ PROPS+="-Dspark.mesos.driverEnv.MESOS_AUTHENTICATEE=com_mesosphere_dcos_ClassicRPCAuthenticatee "
382+
$ PROPS+="-Dspark.mesos.principal=<principal>"
383+
$ dcos spark run --submit-args="${PROPS} ..."
384+
```
385+
386+
#### Spark SSL
280387

281388
SSL support in DC/OS Spark encrypts the following channels:
282389

package/config.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,22 @@
116116
}
117117
}
118118
},
119+
"mesos": {
120+
"description": "Mesos scheduler configuration properties.",
121+
"type": "object",
122+
"properties": {
123+
"authentication": {
124+
"description": "Mesos scheduler dcos-oauth configuration.",
125+
"type": "object",
126+
"properties": {
127+
"secret_name": {
128+
"description": "Name of the secret used to authenticate with the Mesos Master.",
129+
"type": "string"
130+
}
131+
}
132+
}
133+
}
134+
},
119135
"ssl": {
120136
"description": "Spark SSL certificates and private key configuration.",
121137
"type": "object",

0 commit comments

Comments
 (0)