Skip to content

Commit 65068ba

Browse files
committed
[EZSPA-212] Move creating of spark-env.sh script to Spark (apache#848)
Co-authored-by: Egor Krivokon <>
1 parent 7520f7f commit 65068ba

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed

conf/spark-env.sh

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# This file is sourced when running various Spark programs.
21+
# Copy it as spark-env.sh and edit that to configure Spark for your site.
22+
23+
# Options read when launching programs locally with
24+
# ./bin/run-example or ./bin/spark-submit
25+
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
26+
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
27+
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
28+
29+
# Options read by executors and drivers running inside the cluster
30+
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
31+
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
32+
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
33+
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
34+
35+
# Options read in YARN client/cluster mode
36+
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
37+
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
38+
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
39+
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
40+
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
41+
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
42+
43+
# Options for the daemons used in the standalone deploy mode
44+
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
45+
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
46+
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
47+
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
48+
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
49+
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
50+
# - SPARK_WORKER_DIR, to set the working directory of worker processes
51+
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
52+
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
53+
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
54+
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
55+
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
56+
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
57+
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
58+
59+
# Options for launcher
60+
# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")
61+
62+
# Generic options for the daemons used in the standalone deploy mode
63+
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
64+
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
65+
# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5.
66+
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
67+
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
68+
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
69+
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
70+
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
71+
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
72+
# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL
73+
# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS
74+
75+
#########################################################################################################
76+
# Set MapR attributes and compute classpath
77+
#########################################################################################################
78+
79+
MAPR_HOME="${MAPR_HOME:-/opt/mapr}"
80+
SPARK_VERSION=$(cat ${MAPR_HOME}/spark/sparkversion)
81+
SPARK_HOME=${MAPR_HOME}/spark/spark-${SPARK_VERSION}
82+
83+
# Set the spark attributes
84+
if [ -d "${SPARK_HOME}" ]; then
85+
export SPARK_HOME=${SPARK_HOME}
86+
fi
87+
88+
# Load the hadoop version attributes
89+
source ${SPARK_HOME}/mapr-util/hadoop-version-picker.sh
90+
export HADOOP_HOME=$hadoop_home_dir
91+
export HADOOP_CONF_DIR=$hadoop_conf_dir
92+
93+
# Enable mapr impersonation
94+
export MAPR_IMPERSONATION_ENABLED=1
95+
96+
MAPR_HADOOP_CLASSPATH=`${SPARK_HOME}/bin/mapr-classpath.sh`
97+
MAPR_HADOOP_JNI_PATH=`hadoop jnipath`
98+
MAPR_SPARK_CLASSPATH="$MAPR_HADOOP_CLASSPATH"
99+
100+
#FIX for SPARK-906/KAFKA-717. This should be removed when kafka-eventstreams jar will be added to mapr classpath
101+
if [ -f MAPR_HOME/kafka/kafkaversion ]; then
102+
MAPR_SPARK_CLASSPATH=$MAPR_SPARK_CLASSPATH:$(find ${BASEMAPR:-MAPR_HOME}/kafka -name "kafka-eventstreams-*.jar")
103+
fi
104+
105+
SPARK_MAPR_HOME=$MAPR_HOME
106+
107+
export SPARK_LIBRARY_PATH=$MAPR_HADOOP_JNI_PATH
108+
export LD_LIBRARY_PATH="$MAPR_HADOOP_JNI_PATH:$LD_LIBRARY_PATH"
109+
110+
# Load the classpath generator script
111+
source ${SPARK_HOME}/mapr-util/generate-classpath.sh
112+
113+
# Calculate hive jars to include in classpath
114+
generate_compatible_classpath "spark" "${SPARK_VERSION}" "hive"
115+
MAPR_HIVE_CLASSPATH=${generated_classpath}
116+
if [ ! -z "$MAPR_HIVE_CLASSPATH" ]; then
117+
MAPR_SPARK_CLASSPATH="$MAPR_SPARK_CLASSPATH:$MAPR_HIVE_CLASSPATH"
118+
fi
119+
120+
# Calculate hbase jars to include in classpath
121+
generate_compatible_classpath "spark" "${SPARK_VERSION}" "hbase"
122+
MAPR_HBASE_CLASSPATH=${generated_classpath}
123+
if [ ! -z "$MAPR_HBASE_CLASSPATH" ]; then
124+
MAPR_SPARK_CLASSPATH="$MAPR_SPARK_CLASSPATH:$MAPR_HBASE_CLASSPATH"
125+
SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dspark.driver.extraClassPath=$MAPR_HBASE_CLASSPATH"
126+
fi
127+
128+
# Set executor classpath for MESOS. Uncomment following string if you want deploy spark jobs on Mesos
129+
#MAPR_MESOS_CLASSPATH=$MAPR_SPARK_CLASSPATH
130+
SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dspark.executor.extraClassPath=$MAPR_HBASE_CLASSPATH:$MAPR_MESOS_CLASSPATH"
131+
132+
# Set SPARK_DIST_CLASSPATH
133+
export SPARK_DIST_CLASSPATH=$MAPR_SPARK_CLASSPATH
134+
135+
# Security status
136+
source $MAPR_HOME/conf/env.sh
137+
if [ "$MAPR_SECURITY_STATUS" = "true" ]; then
138+
SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dhadoop.login=hybrid -Dmapr_sec_enabled=true -Djavax.security.auth.useSubjectCredsOnly=false"
139+
fi
140+
141+
# scala
142+
export SCALA_VERSION=2.12
143+
export SPARK_SCALA_VERSION=$SCALA_VERSION
144+
export SCALA_HOME=${SPARK_HOME}/scala
145+
export SCALA_LIBRARY_PATH=${SCALA_HOME}/lib
146+
147+
# Use a fixed identifier for pid files
148+
export SPARK_IDENT_STRING="mapr"
149+
150+
#########################################################################################################
151+
# :::CAUTION::: DO NOT EDIT ANYTHING ON OR ABOVE THIS LINE
152+
#########################################################################################################
153+
154+
155+
#
156+
# MASTER HA SETTINGS
157+
#
158+
#export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=<zookeerper1:5181,zookeeper2:5181,..> -Djava.security.auth.login.config=/opt/mapr/conf/mapr.login.conf -Dzookeeper.sasl.client=false"
159+
160+
161+
# MEMORY SETTINGS
162+
export SPARK_DAEMON_MEMORY=1g
163+
export SPARK_WORKER_MEMORY=16g
164+
165+
# Worker Directory
166+
export SPARK_WORKER_DIR=$SPARK_HOME/tmp
167+
168+
# Environment variable for printing spark command everytime you run spark.Set to "1" to print.
169+
# export SPARK_PRINT_LAUNCH_COMMAND=1
170+
171+
#UI
172+
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djava.library.path=$SPARK_MAPR_HOME/lib"

0 commit comments

Comments
 (0)