|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# |
| 4 | +# Licensed to the Apache Software Foundation (ASF) under one or more |
| 5 | +# contributor license agreements. See the NOTICE file distributed with |
| 6 | +# this work for additional information regarding copyright ownership. |
| 7 | +# The ASF licenses this file to You under the Apache License, Version 2.0 |
| 8 | +# (the "License"); you may not use this file except in compliance with |
| 9 | +# the License. You may obtain a copy of the License at |
| 10 | +# |
| 11 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +# |
| 13 | +# Unless required by applicable law or agreed to in writing, software |
| 14 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 15 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | +# See the License for the specific language governing permissions and |
| 17 | +# limitations under the License. |
| 18 | +# |
| 19 | + |
| 20 | +# This file is sourced when running various Spark programs. |
| 21 | +# Copy it as spark-env.sh and edit that to configure Spark for your site. |
| 22 | + |
| 23 | +# Options read when launching programs locally with |
| 24 | +# ./bin/run-example or ./bin/spark-submit |
| 25 | +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files |
| 26 | +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node |
| 27 | +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program |
| 28 | + |
| 29 | +# Options read by executors and drivers running inside the cluster |
| 30 | +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node |
| 31 | +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program |
| 32 | +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data |
| 33 | +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos |
| 34 | + |
| 35 | +# Options read in YARN client/cluster mode |
| 36 | +# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf) |
| 37 | +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files |
| 38 | +# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN |
| 39 | +# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). |
| 40 | +# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) |
| 41 | +# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) |
| 42 | + |
| 43 | +# Options for the daemons used in the standalone deploy mode |
| 44 | +# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname |
| 45 | +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master |
| 46 | +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") |
| 47 | +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine |
| 48 | +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) |
| 49 | +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker |
| 50 | +# - SPARK_WORKER_DIR, to set the working directory of worker processes |
| 51 | +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") |
| 52 | +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). |
| 53 | +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") |
| 54 | +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") |
| 55 | +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") |
| 56 | +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons |
| 57 | +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers |
| 58 | + |
| 59 | +# Options for launcher |
| 60 | +# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y") |
| 61 | + |
| 62 | +# Generic options for the daemons used in the standalone deploy mode |
| 63 | +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) |
| 64 | +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) |
| 65 | +# - SPARK_LOG_MAX_FILES Max log files of Spark daemons can rotate to. Default is 5. |
| 66 | +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) |
| 67 | +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) |
| 68 | +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) |
| 69 | +# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. |
| 70 | +# Options for native BLAS, like Intel MKL, OpenBLAS, and so on. |
| 71 | +# You might get better performance to enable these options if using native BLAS (see SPARK-21305). |
| 72 | +# - MKL_NUM_THREADS=1 Disable multi-threading of Intel MKL |
| 73 | +# - OPENBLAS_NUM_THREADS=1 Disable multi-threading of OpenBLAS |
| 74 | + |
| 75 | +######################################################################################################### |
| 76 | +# Set MapR attributes and compute classpath |
| 77 | +######################################################################################################### |
| 78 | + |
| 79 | +MAPR_HOME="${MAPR_HOME:-/opt/mapr}" |
| 80 | +SPARK_VERSION=$(cat ${MAPR_HOME}/spark/sparkversion) |
| 81 | +SPARK_HOME=${MAPR_HOME}/spark/spark-${SPARK_VERSION} |
| 82 | + |
| 83 | +# Set the spark attributes |
| 84 | +if [ -d "${SPARK_HOME}" ]; then |
| 85 | + export SPARK_HOME=${SPARK_HOME} |
| 86 | +fi |
| 87 | + |
| 88 | +# Load the hadoop version attributes |
| 89 | +source ${SPARK_HOME}/mapr-util/hadoop-version-picker.sh |
| 90 | +export HADOOP_HOME=$hadoop_home_dir |
| 91 | +export HADOOP_CONF_DIR=$hadoop_conf_dir |
| 92 | + |
| 93 | +# Enable mapr impersonation |
| 94 | +export MAPR_IMPERSONATION_ENABLED=1 |
| 95 | + |
| 96 | +MAPR_HADOOP_CLASSPATH=`${SPARK_HOME}/bin/mapr-classpath.sh` |
| 97 | +MAPR_HADOOP_JNI_PATH=`hadoop jnipath` |
| 98 | +MAPR_SPARK_CLASSPATH="$MAPR_HADOOP_CLASSPATH" |
| 99 | + |
| 100 | +#FIX for SPARK-906/KAFKA-717. This should be removed when kafka-eventstreams jar will be added to mapr classpath |
| 101 | +if [ -f MAPR_HOME/kafka/kafkaversion ]; then |
| 102 | + MAPR_SPARK_CLASSPATH=$MAPR_SPARK_CLASSPATH:$(find ${BASEMAPR:-MAPR_HOME}/kafka -name "kafka-eventstreams-*.jar") |
| 103 | +fi |
| 104 | + |
| 105 | +SPARK_MAPR_HOME=$MAPR_HOME |
| 106 | + |
| 107 | +export SPARK_LIBRARY_PATH=$MAPR_HADOOP_JNI_PATH |
| 108 | +export LD_LIBRARY_PATH="$MAPR_HADOOP_JNI_PATH:$LD_LIBRARY_PATH" |
| 109 | + |
| 110 | +# Load the classpath generator script |
| 111 | +source ${SPARK_HOME}/mapr-util/generate-classpath.sh |
| 112 | + |
| 113 | +# Calculate hive jars to include in classpath |
| 114 | +generate_compatible_classpath "spark" "${SPARK_VERSION}" "hive" |
| 115 | +MAPR_HIVE_CLASSPATH=${generated_classpath} |
| 116 | +if [ ! -z "$MAPR_HIVE_CLASSPATH" ]; then |
| 117 | + MAPR_SPARK_CLASSPATH="$MAPR_SPARK_CLASSPATH:$MAPR_HIVE_CLASSPATH" |
| 118 | +fi |
| 119 | + |
| 120 | +# Calculate hbase jars to include in classpath |
| 121 | +generate_compatible_classpath "spark" "${SPARK_VERSION}" "hbase" |
| 122 | +MAPR_HBASE_CLASSPATH=${generated_classpath} |
| 123 | +if [ ! -z "$MAPR_HBASE_CLASSPATH" ]; then |
| 124 | + MAPR_SPARK_CLASSPATH="$MAPR_SPARK_CLASSPATH:$MAPR_HBASE_CLASSPATH" |
| 125 | + SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dspark.driver.extraClassPath=$MAPR_HBASE_CLASSPATH" |
| 126 | +fi |
| 127 | + |
| 128 | +# Set executor classpath for MESOS. Uncomment following string if you want deploy spark jobs on Mesos |
| 129 | +#MAPR_MESOS_CLASSPATH=$MAPR_SPARK_CLASSPATH |
| 130 | +SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dspark.executor.extraClassPath=$MAPR_HBASE_CLASSPATH:$MAPR_MESOS_CLASSPATH" |
| 131 | + |
| 132 | +# Set SPARK_DIST_CLASSPATH |
| 133 | +export SPARK_DIST_CLASSPATH=$MAPR_SPARK_CLASSPATH |
| 134 | + |
| 135 | +# Security status |
| 136 | +source $MAPR_HOME/conf/env.sh |
| 137 | +if [ "$MAPR_SECURITY_STATUS" = "true" ]; then |
| 138 | + SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dhadoop.login=hybrid -Dmapr_sec_enabled=true -Djavax.security.auth.useSubjectCredsOnly=false" |
| 139 | +fi |
| 140 | + |
| 141 | +# scala |
| 142 | +export SCALA_VERSION=2.12 |
| 143 | +export SPARK_SCALA_VERSION=$SCALA_VERSION |
| 144 | +export SCALA_HOME=${SPARK_HOME}/scala |
| 145 | +export SCALA_LIBRARY_PATH=${SCALA_HOME}/lib |
| 146 | + |
| 147 | +# Use a fixed identifier for pid files |
| 148 | +export SPARK_IDENT_STRING="mapr" |
| 149 | + |
| 150 | +######################################################################################################### |
| 151 | +# :::CAUTION::: DO NOT EDIT ANYTHING ON OR ABOVE THIS LINE |
| 152 | +######################################################################################################### |
| 153 | + |
| 154 | + |
| 155 | +# |
| 156 | +# MASTER HA SETTINGS |
| 157 | +# |
| 158 | +#export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=<zookeerper1:5181,zookeeper2:5181,..> -Djava.security.auth.login.config=/opt/mapr/conf/mapr.login.conf -Dzookeeper.sasl.client=false" |
| 159 | + |
| 160 | + |
| 161 | +# MEMORY SETTINGS |
| 162 | +export SPARK_DAEMON_MEMORY=1g |
| 163 | +export SPARK_WORKER_MEMORY=16g |
| 164 | + |
| 165 | +# Worker Directory |
| 166 | +export SPARK_WORKER_DIR=$SPARK_HOME/tmp |
| 167 | + |
| 168 | +# Environment variable for printing spark command everytime you run spark.Set to "1" to print. |
| 169 | +# export SPARK_PRINT_LAUNCH_COMMAND=1 |
| 170 | + |
| 171 | +#UI |
| 172 | +export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djava.library.path=$SPARK_MAPR_HOME/lib" |
0 commit comments