apache-spark-on-k8s · mccheah · Dec 6, 2016 · Dec 6, 2016 · Dec 7, 2016 · Dec 9, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -25,11 +25,22 @@
 sudo: required
 dist: trusty
 
-# 2. Choose language and target JDKs for parallel builds.
+# 2. Choose language, target JDK and env's for parallel builds.
 language: java
 jdk:
-  - oraclejdk7
   - oraclejdk8
+env:  # Used by the install section below.
+  # Configure the unit test build for spark core and kubernetes modules,
+  # while excluding some flaky unit tests using a regex pattern.
+  - PHASE=test  \
+    PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes"  \
+    MODULES="-pl core,resource-managers/kubernetes/core -am"  \
+    ARGS="-Dtest=none -Dsuffixes='^org\.apache\.spark\.(?!SortShuffleSuite$|rdd\.LocalCheckpointSuite$|deploy\.SparkSubmitSuite$|deploy\.StandaloneDynamicAllocationSuite$).*'"
+  # Configure the full build.
+  - PHASE=install  \
+    PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver"  \
+    MODULES=""  \
+    ARGS="-T 4 -q -DskipTests"
 
 # 3. Setup cache directory for SBT and Maven.
 cache:
@@ -41,11 +52,12 @@ cache:
 notifications:
   email: false
 
-# 5. Run maven install before running lint-java.
+# 5. Run maven build before running lints.
 install:
   - export MAVEN_SKIP_RC=1
-  - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
+  - build/mvn ${PHASE} ${PROFILES} ${MODULES} ${ARGS}
 
-# 6. Run lint-java.
+# 6. Run lints.
 script:
   - dev/lint-java
+  - dev/lint-scala
diff --git a/README.md b/README.md
@@ -1,3 +1,41 @@
+# Apache Spark On Kubernetes
+
+This repository, located at https://github.com/apache-spark-on-k8s/spark, contains a fork of Apache Spark that enables running Spark jobs natively on a Kubernetes cluster.
+
+## What is this?
+
+This is a collaboratively maintained project working on [SPARK-18278](https://issues.apache.org/jira/browse/SPARK-18278). The goal is to bring native support for Spark to use Kubernetes as a cluster manager, in a fully supported way on par with the Spark Standalone, Mesos, and Apache YARN cluster managers.
+
+## Getting Started
+
+- [Usage guide](docs/running-on-kubernetes.md) shows how to run the code
+- [Development docs](resource-managers/kubernetes/README.md) shows how to get set up for development
+- Code is primarily located in the [resource-managers/kubernetes](resource-managers/kubernetes) folder
+
+## Why does this fork exist?
+
+Adding native integration for a new cluster manager is a large undertaking.  If poorly executed, it could introduce bugs into Spark when run on other cluster managers, cause release blockers slowing down the overall Spark project, or require hotfixes which divert attention away from development towards managing additional releases.  Any work this deep inside Spark needs to be done carefully to minimize the risk of those negative externalities.
+
+At the same time, an increasing number of people from various companies and organizations desire to work together to natively run Spark on Kubernetes.  The group needs a code repository, communication forum, issue tracking, and continuous integration, all in order to work together effectively on an open source product.
+
+We've been asked by an Apache Spark Committer to work outside of the Apache infrastructure for a short period of time to allow this feature to be hardened and improved without creating risk for Apache Spark.  The aim is to rapidly bring it to the point where it can be brought into the mainline Apache Spark repository for continued development within the Apache umbrella.  If all goes well, this should be a short-lived fork rather than a long-lived one.
+
+## Who are we?
+
+This is a collaborative effort by several folks from different companies who are interested in seeing this feature be successful.  Companies active in this project include (alphabetically):
+
+- Google
+- Haiwen
+- Hyperpilot
+- Intel
+- Palantir
+- Pepperdata
+- Red Hat
+
+--------------------
+
+(original README below)
+
 # Apache Spark
 
 Spark is a fast and general cluster computing system for Big Data. It provides

diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -148,6 +148,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>kubernetes</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-kubernetes_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>hive</id>
       <dependencies>

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -54,7 +54,28 @@ $(document).ajaxStart(function () {
     $.blockUI({message: '<h3>Loading Executors Page...</h3>'});
 });
 
+function findKubernetesServiceBaseURI() {
+    var k8sProxyPattern = '/api/v1/proxy/namespaces/';
+    var k8sProxyPatternPos = document.baseURI.indexOf(k8sProxyPattern);
+    if (k8sProxyPatternPos > 0) {
+        // Spark is running in a kubernetes cluster, and the web ui is served
+        // through the kubectl proxy.
+        var remaining = document.baseURI.substr(k8sProxyPatternPos + k8sProxyPattern.length);
+        var urlSlashesCount = remaining.split('/').length - 3;
+        var words = document.baseURI.split('/');
+        var baseURI = words.slice(0, words.length - urlSlashesCount).join('/');
+        return baseURI;
+    }
+
+    return null;
+}
+
 function createTemplateURI(appId) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        return kubernetesBaseURI + '/static/executorspage-template.html';
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {
@@ -70,6 +91,14 @@ function createTemplateURI(appId) {
 }
 
 function getStandAloneppId(cb) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        var appIdAndPort = kubernetesBaseURI.split('/').slice(-1)[0];
+        var appId = appIdAndPort.split(':')[0];
+        cb(appId);
+        return;
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {
@@ -95,6 +124,11 @@ function getStandAloneppId(cb) {
 }
 
 function createRESTEndPoint(appId) {
+    var kubernetesBaseURI = findKubernetesServiceBaseURI();
+    if (kubernetesBaseURI) {
+        return kubernetesBaseURI + "/api/v1/applications/" + appId + "/allexecutors";
+    }
+
     var words = document.baseURI.split('/');
     var ind = words.indexOf("proxy");
     if (ind > 0) {

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -69,7 +69,8 @@ object SparkSubmit extends CommandLineUtils {
   private val STANDALONE = 2
   private val MESOS = 4
   private val LOCAL = 8
-  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL
+  private val KUBERNETES = 16
+  private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | KUBERNETES | LOCAL
 
   // Deploy modes
   private val CLIENT = 1
@@ -229,9 +230,10 @@ object SparkSubmit extends CommandLineUtils {
         YARN
       case m if m.startsWith("spark") => STANDALONE
       case m if m.startsWith("mesos") => MESOS
+      case m if m.startsWith("k8s") => KUBERNETES
       case m if m.startsWith("local") => LOCAL
       case _ =>
-        printErrorAndExit("Master must either be yarn or start with spark, mesos, local")
+        printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local")
         -1
     }
 
@@ -274,6 +276,7 @@ object SparkSubmit extends CommandLineUtils {
     }
     val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
     val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER
+    val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER
 
     // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
     // too for packages that include Python code
@@ -320,6 +323,10 @@ object SparkSubmit extends CommandLineUtils {
 
     // The following modes are not supported or applicable
     (clusterManager, deployMode) match {
+      case (KUBERNETES, CLIENT) =>
+        printErrorAndExit("Client mode is currently not supported for Kubernetes.")
+      case (KUBERNETES, CLUSTER) if args.isPython || args.isR =>
+        printErrorAndExit("Kubernetes does not currently support python or R applications.")
       case (STANDALONE, CLUSTER) if args.isPython =>
         printErrorAndExit("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
@@ -453,17 +460,21 @@ object SparkSubmit extends CommandLineUtils {
       OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"),
       OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"),
 
-      // Other options
+      OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES,
+        sysProp = "spark.kubernetes.namespace"),
+
+        // Other options
       OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.cores"),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.memory"),
       OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
         sysProp = "spark.cores.max"),
-      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES,
+      OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         sysProp = "spark.files"),
       OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"),
-      OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"),
+      OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
+        sysProp = "spark.jars"),
       OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER,
         sysProp = "spark.driver.memory"),
       OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER,
@@ -496,8 +507,9 @@ object SparkSubmit extends CommandLineUtils {
 
     // Add the application jar automatically so the user doesn't have to call sc.addJar
     // For YARN cluster mode, the jar is already distributed on each node as "app.jar"
+    // In Kubernetes cluster mode, the jar will be uploaded by the client separately.
     // For python and R files, the primary resource is already distributed as a regular file
-    if (!isYarnCluster && !args.isPython && !args.isR) {
+    if (!isYarnCluster && !isKubernetesCluster && !args.isPython && !args.isR) {
       var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
       if (isUserJar(args.primaryResource)) {
         jars = jars ++ Seq(args.primaryResource)
@@ -596,6 +608,13 @@ object SparkSubmit extends CommandLineUtils {
       }
     }
 
+    if (isKubernetesCluster) {
+      childMainClass = "org.apache.spark.deploy.kubernetes.Client"
+      childArgs += args.primaryResource
+      childArgs += args.mainClass
+      childArgs ++= args.childArgs
+    }
+
     // Load any properties specified through --conf and the default properties file
     for ((k, v) <- args.sparkProperties) {
       sysProps.getOrElseUpdate(k, v)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -71,6 +71,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var principal: String = null
   var keytab: String = null
 
+  // Kubernetes only
+  var kubernetesNamespace: String = null
+
   // Standalone cluster mode only
   var supervise: Boolean = false
   var driverCores: String = null
@@ -186,6 +189,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
     keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
     principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
+    kubernetesNamespace = Option(kubernetesNamespace)
+      .orElse(sparkProperties.get("spark.kubernetes.namespace"))
+      .orNull
 
     // Try to set main class from JAR if no --class argument is given
     if (mainClass == null && !isPython && !isR && primaryResource != null) {
@@ -424,6 +430,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case KEYTAB =>
         keytab = value
 
+      case KUBERNETES_NAMESPACE =>
+        kubernetesNamespace = value
+
       case HELP =>
         printUsageAndExit(0)
 

diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -19,16 +19,16 @@ package org.apache.spark.deploy.rest
 
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
-import scala.io.Source
-
 import com.fasterxml.jackson.core.JsonProcessingException
-import org.eclipse.jetty.server.{HttpConnectionFactory, Server, ServerConnector}
+import org.eclipse.jetty.http.HttpVersion
+import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory}
 import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorScheduler}
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
+import scala.io.Source
 
-import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
+import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf, SSLOptions}
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
@@ -50,7 +50,8 @@ import org.apache.spark.util.Utils
 private[spark] abstract class RestSubmissionServer(
     val host: String,
     val requestedPort: Int,
-    val masterConf: SparkConf) extends Logging {
+    val masterConf: SparkConf,
+    val sslOptions: SSLOptions = SSLOptions()) extends Logging {
   protected val submitRequestServlet: SubmitRequestServlet
   protected val killRequestServlet: KillRequestServlet
   protected val statusRequestServlet: StatusRequestServlet
@@ -79,19 +80,32 @@ private[spark] abstract class RestSubmissionServer(
    * Return a 2-tuple of the started server and the bound port.
    */
   private def doStart(startPort: Int): (Server, Int) = {
+    // TODO consider using JettyUtils#startServer to do this instead
     val threadPool = new QueuedThreadPool
     threadPool.setDaemon(true)
     val server = new Server(threadPool)
 
+    val resolvedConnectionFactories = sslOptions
+      .createJettySslContextFactory()
+      .map(sslFactory => {
+        val sslConnectionFactory = new SslConnectionFactory(
+          sslFactory, HttpVersion.HTTP_1_1.asString())
+        val rawHttpConfiguration = new HttpConfiguration()
+        rawHttpConfiguration.setSecureScheme("https")
+        rawHttpConfiguration.setSecurePort(startPort)
+        val rawHttpConnectionFactory = new HttpConnectionFactory(rawHttpConfiguration)
+        Array(sslConnectionFactory, rawHttpConnectionFactory)
+      }).getOrElse(Array(new HttpConnectionFactory()))
+
     val connector = new ServerConnector(
-      server,
-      null,
-      // Call this full constructor to set this, which forces daemon threads:
-      new ScheduledExecutorScheduler("RestSubmissionServer-JettyScheduler", true),
-      null,
-      -1,
-      -1,
-      new HttpConnectionFactory())
+        server,
+        null,
+        // Call this full constructor to set this, which forces daemon threads:
+        new ScheduledExecutorScheduler("RestSubmissionServer-JettyScheduler", true),
+        null,
+        -1,
+        -1,
+        resolvedConnectionFactories: _*)
     connector.setHost(host)
     connector.setPort(startPort)
     server.addConnector(connector)

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
@@ -103,3 +103,4 @@ org.apache.spark.scheduler.ExternalClusterManager
 org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
 spark-warehouse
 structured-streaming/*
+org.apache.spark.deploy.rest.kubernetes.DriverServiceManager
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
@@ -182,6 +182,13 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 # Copy jars
 cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
 
+# Copy docker files
+mkdir -p "$DISTDIR/dockerfiles/driver"
+mkdir -p "$DISTDIR/dockerfiles/executor"
+DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker"
+cp "$DOCKERFILES_SRC/driver/Dockerfile" "$DISTDIR/dockerfiles/driver/Dockerfile"
+cp "$DOCKERFILES_SRC/executor/Dockerfile" "$DISTDIR/dockerfiles/executor/Dockerfile"
+
 # Only create the yarn directory if the yarn artifacts were build.
 if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
   mkdir "$DISTDIR"/yarn

diff --git a/dev/scalastyle b/dev/scalastyle
@@ -26,6 +26,8 @@ ERRORS=$(echo -e "q\n" \
         -Pyarn \
         -Phive \
         -Phive-thriftserver \
+        -Pkubernetes \
+        -Pkubernetes-integration-tests \
         scalastyle test:scalastyle \
     | awk '{if($1~/error/)print}' \
 )

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
@@ -99,6 +99,7 @@
                                 <li><a href="spark-standalone.html">Spark Standalone</a></li>
                                 <li><a href="running-on-mesos.html">Mesos</a></li>
                                 <li><a href="running-on-yarn.html">YARN</a></li>
+                                <li><a href="running-on-kubernetes.html">Kubernetes</a></li>
                             </ul>
                         </li>
 

diff --git a/docs/index.md b/docs/index.md
@@ -113,6 +113,7 @@ options for deployment:
   * [Mesos](running-on-mesos.html): deploy a private cluster using
       [Apache Mesos](http://mesos.apache.org)
   * [YARN](running-on-yarn.html): deploy Spark on top of Hadoop NextGen (YARN)
+  * [Kubernetes](running-on-kubernetes.html): deploy Spark on top of Kubernetes
 
 **Other Documents:**