Skip to content

Commit 772d064

Browse files
authored
[SPARK-556] Kerberos support in history server (apache#233)
* Kerberos support in history server. Added kerberos config: krb5conf, principal, keytab secret path, and updated marathon.json to use them. * Build the history server stub universe in the Makefile, use a fixture to add stub repos. * Adding history server to tests * Fixed the adding of stub universes. Added a job that logs to the history server. Test passes. * Add configure_universe as a dependency * Updated history server docs. * Fixed the Makefile, made the user configurable, made the default user "nobody". * Made the keytab path configurable * Made spark-history package/service name in tests configurable from env var.
1 parent 9be6d18 commit 772d064

File tree

13 files changed

+227
-43
lines changed

13 files changed

+227
-43
lines changed

Makefile

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ $(CLI_DIST_DIR):
112112
cli: $(CLI_DIST_DIR)
113113

114114
UNIVERSE_URL_PATH ?= stub-universe-url
115+
HISTORY_URL_PATH := $(UNIVERSE_URL_PATH).history
115116
$(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist
116117
UNIVERSE_URL_PATH=$(UNIVERSE_URL_PATH) \
117118
TEMPLATE_CLI_VERSION=$(CLI_VERSION) \
@@ -122,7 +123,15 @@ $(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist
122123
$(CLI_DIST_DIR)/dcos-spark-darwin \
123124
$(CLI_DIST_DIR)/dcos-spark-linux \
124125
$(CLI_DIST_DIR)/dcos-spark.exe \
125-
$(CLI_DIST_DIR)/*.whl;
126+
$(CLI_DIST_DIR)/*.whl; \
127+
UNIVERSE_URL_PATH=$(HISTORY_URL_PATH) \
128+
TEMPLATE_DEFAULT_DOCKER_IMAGE=`cat docker-dist` \
129+
$(TOOLS_DIR)/publish_aws.py \
130+
spark-history \
131+
$(ROOT_DIR)/history/package/; \
132+
cat $(HISTORY_URL_PATH) >> $(UNIVERSE_URL_PATH);
133+
134+
stub-universe: $(UNIVERSE_URL_PATH)
126135

127136
DCOS_SPARK_TEST_JAR_PATH ?= $(ROOT_DIR)/dcos-spark-scala-tests-assembly-0.1-SNAPSHOT.jar
128137
$(DCOS_SPARK_TEST_JAR_PATH):
@@ -176,8 +185,8 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER
176185
fi; \
177186
fi; \
178187
export CLUSTER_URL=`cat cluster-url`
179-
$(TOOLS_DIR)/./dcos_login.py
180-
dcos package repo add --index=0 spark-aws `cat stub-universe-url`
188+
$(TOOLS_DIR)/./dcos_login.py; \
189+
export STUB_UNIVERSE_URL=`cat $(UNIVERSE_URL_PATH)`; \
181190
SCALA_TEST_JAR_PATH=$(DCOS_SPARK_TEST_JAR_PATH) \
182191
TEST_JAR_PATH=$(MESOS_SPARK_TEST_JAR_PATH) \
183192
S3_BUCKET=$(S3_BUCKET) \
@@ -187,7 +196,7 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER
187196
clean: clean-dist clean-cluster
188197
rm -rf test-env
189198
rm -rf $(CLI_DIST_DIR)
190-
for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "docker-build" "docker-dist" ; do \
199+
for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "$(HISTORY_URL_PATH)" "docker-build" "docker-dist" ; do \
191200
[ ! -e $$f ] || rm $$f; \
192201
done; \
193202

docs/history-server.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ DC/OS Apache Spark includes The [Spark History Server][3]. Because the history s
2020
1. Create `spark-history-options.json`:
2121

2222
{
23-
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
23+
"service": {
24+
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
25+
}
2426
}
2527

2628
1. Install The Spark History Server:

docs/kerberos.md

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,20 @@ Spark (and all Kerberos-enabed) components need a valid `krb5.conf` file. You ca
5252
5353
1. Make sure your keytab is accessible from the DC/OS [Secret Store][https://docs.mesosphere.com/latest/security/secrets/].
5454

55-
1. If you've enabled the history server via `history-server.enabled`, you must also configure the principal and keytab for the history server. **WARNING**: The keytab contains secrets, in the current history server package the keytab is not stored securely. See [Limitations][9]
55+
1. If you are using the history server, you must also configure the `krb5.conf`, principal, and keytab
56+
for the history server.
5657

57-
Base64 encode your keytab:
58-
59-
cat spark.keytab | base64
60-
61-
And add the following to your configuration file:
58+
Add the Kerberos configurations to your spark-history JSON configuration file:
6259

6360
{
64-
"history-server": {
61+
"service": {
62+
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
63+
},
64+
"security": {
6565
"kerberos": {
66-
"principal": "spark@REALM",
67-
"keytab": "<base64 encoding>"
66+
"krb5conf": "<base64_encoding>",
67+
"principal": "<Kerberos principal>", # e.g. spark@REALM
68+
"keytab": "<keytab secret path>" # e.g. __dcos_base64__hdfs_keytab
6869
}
6970
}
7071
}
@@ -87,7 +88,7 @@ Submit the job with the keytab:
8788
Submit the job with the ticket:
8889

8990
dcos spark run --submit-args="\
90-
--kerberos-principal hdfs/name-0-node.hdfs.autoip.dcos.thisdcos.directory@LOCAL \
91+
--kerberos-principal user@REALM \
9192
--tgt-secret-path /__dcos_base64__tgt \
9293
--conf ... --class MySparkJob <url> <args>"
9394

history/bin/universe.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set -e -x -o pipefail
44

55
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
66
HISTORY_DIR="${DIR}/.."
7+
TOOLS_DIR="${DIR}/../../tools"
78

89
function check_env {
910
if [ -z "${DOCKER_IMAGE}" ]; then
@@ -15,7 +16,7 @@ function check_env {
1516

1617
function make_universe {
1718
TEMPLATE_DEFAULT_DOCKER_IMAGE=${DOCKER_IMAGE} \
18-
${COMMONS_DIR}/tools/ci_upload.py \
19+
${TOOLS_DIR}/publish_aws.py \
1920
spark-history \
2021
${HISTORY_DIR}/package
2122
}

history/package/config.json

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
"user": {
3131
"description": "OS user",
3232
"type": "string",
33-
"default": "root"
33+
"default": "nobody"
3434
},
3535
"docker-image": {
3636
"description": "Docker image to run in. See https://hub.docker.com/r/mesosphere/spark/tags/ for options.",
@@ -63,6 +63,33 @@
6363
}
6464
},
6565
"required": ["hdfs-config-url"]
66+
},
67+
"security": {
68+
"description": "Security configuration properties",
69+
"type": "object",
70+
"properties": {
71+
"kerberos": {
72+
"description": "Kerberos configuration.",
73+
"type": "object",
74+
"properties": {
75+
"krb5conf": {
76+
"description": "Base64 encoded krb5.conf file to access your KDC.",
77+
"type": "string",
78+
"default": ""
79+
},
80+
"principal": {
81+
"description": "Kerberos principal.",
82+
"default": "",
83+
"type": "string"
84+
},
85+
"keytab": {
86+
"description": "Keytab path in the secret store.",
87+
"default": "",
88+
"type": "string"
89+
}
90+
}
91+
}
92+
}
6693
}
6794
},
6895
"required": ["service"]

history/package/marathon.json.mustache

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,60 @@
22
"id": "{{service.name}}",
33
"cpus": {{service.cpus}},
44
"mem": {{service.mem}},
5+
{{^security.kerberos.principal}}
56
"cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer",
7+
{{/security.kerberos.principal}}
8+
{{#security.kerberos.principal}}
9+
"cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS} ${SPARK_HISTORY_KERBEROS_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer",
10+
{{/security.kerberos.principal}}
11+
"user": "{{service.user}}",
612
"env": {
13+
{{#security.kerberos.krb5conf}}
14+
"KRB5_CONFIG_BASE64": "{{security.kerberos.krb5conf}}",
15+
{{/security.kerberos.krb5conf}}
716
"SPARK_USER": "{{service.user}}",
817
"APPLICATION_WEB_PROXY_BASE": "/service/{{service.name}}",
18+
{{#security.kerberos.principal}}
19+
"SPARK_HISTORY_KERBEROS_OPTS": "-Dspark.history.kerberos.enabled=true -Dspark.history.kerberos.principal={{security.kerberos.principal}} -Dspark.history.kerberos.keytab=/opt/spark/hdfs.keytab",
20+
{{/security.kerberos.principal}}
921
"SPARK_HISTORY_OPTS": "-Dspark.history.fs.logDirectory={{service.log-dir}} -Dspark.history.fs.cleaner.enabled={{service.cleaner.enabled}} -Dspark.history.fs.cleaner.interval={{service.cleaner.interval}} -Dspark.history.fs.cleaner.maxAge={{service.cleaner.max-age}}"
1022
},
1123
"ports": [0],
1224
"container": {
13-
"type": "DOCKER",
25+
"type": "MESOS",
1426
"docker": {
1527
"image": "{{service.docker-image}}",
16-
"network": "HOST",
17-
"forcePullImage": true,
18-
"parameters": [
19-
{
20-
"key": "user",
21-
"value": "{{service.user}}"
22-
}
23-
]
28+
"forcePullImage": true
2429
}
30+
{{#security.kerberos.keytab}}
31+
,
32+
"volumes": [
33+
{
34+
"containerPath": "/opt/spark/hdfs.keytab",
35+
"secret": "hdfs_keytab",
36+
"hostPath": ""
37+
}
38+
]
39+
{{/security.kerberos.keytab}}
2540
},
41+
{{#security.kerberos.keytab}}
42+
"secrets": {
43+
"hdfs_keytab": {
44+
"source": "{{security.kerberos.keytab}}"
45+
}
46+
},
47+
{{/security.kerberos.keytab}}
48+
"healthChecks": [
49+
{
50+
"portIndex": 0,
51+
"protocol": "MESOS_HTTP",
52+
"path": "/",
53+
"gracePeriodSeconds": 5,
54+
"intervalSeconds": 60,
55+
"timeoutSeconds": 10,
56+
"maxConsecutiveFailures": 3
57+
}
58+
],
2659
"labels": {
2760
"DCOS_SERVICE_NAME": "{{service.name}}",
2861
"DCOS_SERVICE_PORT_INDEX": "0",

history/package/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@
1919
"analytics"
2020
],
2121
"website": "https://docs.mesosphere.com/service-docs/spark/",
22-
"version": "2.1.0-1",
22+
"version": "{{package-version}}",
2323
"minDcosReleaseVersion": "1.8"
2424
}

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import pytest
2+
import sdk_repository
3+
4+
5+
@pytest.fixture(scope='session')
6+
def configure_universe():
7+
yield from sdk_repository.universe_session()

tests/resources/hdfsclient.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"id": "hdfsclient",
3+
"mem": 1024,
4+
"user": "nobody",
5+
"container": {
6+
"type": "MESOS",
7+
"docker": {
8+
"image": "nvaziri/hdfs-client:dev",
9+
"forcePullImage": true
10+
},
11+
"volumes": [
12+
{
13+
"containerPath": "/hadoop-2.6.0-cdh5.9.1/hdfs.keytab",
14+
"secret": "hdfs_keytab",
15+
"hostPath": ""
16+
}
17+
]
18+
},
19+
"secrets": {
20+
"hdfs_keytab": {
21+
"source": "__dcos_base64___keytab"
22+
}
23+
},
24+
"networks": [
25+
{
26+
"mode": "host"
27+
}
28+
],
29+
"env": {
30+
"REALM": "LOCAL",
31+
"KDC_ADDRESS": "kdc.marathon.autoip.dcos.thisdcos.directory:2500",
32+
"JAVA_HOME": "/usr/lib/jvm/default-java",
33+
"KRB5_CONFIG": "/etc/krb5.conf",
34+
"HDFS_SERVICE_NAME": "hdfs"
35+
}
36+
}

0 commit comments

Comments
 (0)