apache · rvesse · Oct 23, 2018 · Oct 24, 2018 · Oct 24, 2018 · Oct 26, 2018
diff --git a/...rnetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/...rnetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -42,6 +42,9 @@ private[spark] object SparkKubernetesClientFactory {
       sparkConf: SparkConf,
       defaultServiceAccountToken: Option[File],
       defaultServiceAccountCaCert: Option[File]): KubernetesClient = {
+
+    // TODO [SPARK-25887] Support configurable context
+
     val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX"
     val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX"
     val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf)
@@ -63,6 +66,8 @@ private[spark] object SparkKubernetesClientFactory {
       .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX")
     val dispatcher = new Dispatcher(
       ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher"))
+
+    // TODO [SPARK-25887] Create builder in a way that respects configurable context
     val config = new ConfigBuilder()
       .withApiVersion("v1")
       .withMasterUrl(master)

diff --git a/...etes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/...etes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -18,13 +18,10 @@ package org.apache.spark.deploy.k8s.submit
 
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 
-import scala.collection.JavaConverters._
-
 import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher}
 import io.fabric8.kubernetes.client.Watcher.Action
 
-import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.KubernetesUtils._
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.ThreadUtils

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
@@ -8,26 +8,59 @@ title: Spark on Kubernetes Integration Tests
 Note that the integration test framework is currently being heavily revised and
 is subject to change. Note that currently the integration tests only run with Java 8.
 
-The simplest way to run the integration tests is to install and run Minikube, then run the following:
+The simplest way to run the integration tests is to install and run Minikube, then run the following from this
+directory:
 
     dev/dev-run-integration-tests.sh
 
 The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should
-run with a minimum of 3 CPUs and 4G of memory:
+run with a minimum of 4 CPUs and 6G of memory:
 
-    minikube start --cpus 3 --memory 4096
+    minikube start --cpus 4 --memory 6144
 
 You can download Minikube [here](https://github.com/kubernetes/minikube/releases).
 
 # Integration test customization
 
-Configuration of the integration test runtime is done through passing different arguments to the test script. The main useful options are outlined below.
+Configuration of the integration test runtime is done through passing different arguments to the test script. 
+The main useful options are outlined below.
+
+## Using a different backend
+
+The integration test backend i.e. the K8S cluster used for testing is controlled by the `--deploy-mode` option.  By 
+default this is set to `minikube`, the available backends are their perequisites are as follows.  
+
+### `minikube`
+
+Uses the local `minikube` cluster, this requires that `minikube` 0.23.0 or greater be installed and that it be allocated 
+at least 4 CPUs and 6GB memory (some users have reported success with as few as 3 CPUs and 4GB memory).  The tests will 
+check if `minikube` is started and abort early if it isn't currently running.
+
+### `docker-for-desktop`
+
+Since July 2018 Docker for Desktop provide an optional Kubernetes cluster that can be enabled as described in this 
+[blog post](https://blog.docker.com/2018/07/kubernetes-is-now-available-in-docker-desktop-stable-channel/).  Assuming 
+this is enabled using this backend will auto-configure itself from the `docker-for-desktop` context that Docker creates 
+in your `~/.kube/config` file. If your config file is in a different location you should set the `KUBECONFIG` 
+environment variable appropriately.
+
+### `cloud` 
+
+These cloud backend configures the tests to use an arbitrary Kubernetes cluster running in the cloud or otherwise.
+
+The `cloud` backend auto-configures the cluster to use from your K8S config file, this is assumed to be `~/.kube/config`
+unless the `KUBECONFIG` environment variable is set to override this location.  By default this will use whatever your 
+current context is in the config file, to use an alternative context from your config file you can specify the 
+`--context <context>` flag with the desired context.
+
+You can optionally use a different K8S master URL than the one your K8S config file specified, this should be supplied 
+via the `--spark-master <master-url>` flag.
 
 ## Re-using Docker Images
 
 By default, the test framework will build new Docker images on every test execution. A unique image tag is generated,
-and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, or to use a Docker image tag
-that you have built by other means already, pass the tag to the test script:
+and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, or to use a Docker 
+image tag that you have built by other means already, pass the tag to the test script:
 
     dev/dev-run-integration-tests.sh --image-tag <tag>
 
@@ -37,16 +70,140 @@ where if you still want to use images that were built before by the test framewo
 
 ## Spark Distribution Under Test
 
-The Spark code to test is handed to the integration test system via a tarball. Here is the option that is used to specify the tarball:
+The Spark code to test is handed to the integration test system via a tarball. Here is the option that is used to 
+specify the tarball:
 
 * `--spark-tgz <path-to-tgz>` - set `<path-to-tgz>` to point to a tarball containing the Spark distribution to test.
 
-TODO: Don't require the packaging of the built Spark artifacts into this tarball, just read them out of the current tree.
+This Tarball should be created by first running `dev/make-distribution.sh` passing the `--tgz` flag and `-Pkubernetes` 
+as one of the options to ensure that Kubernetes support is included in the distribution.  For more details on building a
+runnable distribution please see the 
+[Building Spark](https://spark.apache.org/docs/latest/building-spark.html#building-a-runnable-distribution) 
+documentation.
+
+**TODO:** Don't require the packaging of the built Spark artifacts into this tarball, just read them out of the current 
+tree.
 
 ## Customizing the Namespace and Service Account
 
-* `--namespace <namespace>` - set `<namespace>` to the namespace in which the tests should be run.
-* `--service-account <service account name>` - set `<service account name>` to the name of the Kubernetes service account to
-use in the namespace specified by the `--namespace`. The service account is expected to have permissions to get, list, watch,
-and create pods. For clusters with RBAC turned on, it's important that the right permissions are granted to the service account
-in the namespace through an appropriate role and role binding. A reference RBAC configuration is provided in `dev/spark-rbac.yaml`.
+If no namespace is specified then a temporary namespace will be created and deleted during the test run.  Similarly if 
+no service account is specified then the `default` service account for the namespace will be used.
+
+Using the `--namespace <namespace>` flag sets `<namespace>` to the namespace in which the tests should be run.  If this 
+is supplied then the tests assume this namespace exists in the K8S cluster and will not attempt to create it.  
+Additionally this namespace must have an appropriately authorized service account which can be customised via the 
+`--service-account` flag.
+
+The `--service-account <service account name>` flag sets `<service account name>` to the name of the Kubernetes service 
+account to use in the namespace specified by the `--namespace` flag. The service account is expected to have permissions
+to get, list, watch, and create pods. For clusters with RBAC turned on, it's important that the right permissions are 
+granted to the service account in the namespace through an appropriate role and role binding. A reference RBAC 
+configuration is provided in `dev/spark-rbac.yaml`.
+
+# Running the Test Directly
+
+If you prefer to run just the integration tests directly, then you can customise the behaviour via passing system 
+properties to Maven.  For example:
+
+    mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.11 \
+                            -Pkubernetes -Pkubernetes-integration-tests \ 
+                            -Phadoop-2.7 -Dhadoop.version=2.7.3 \
+                            -Dspark.kubernetes.test.sparkTgz=spark-3.0.0-SNAPSHOT-bin-example.tgz \
+                            -Dspark.kubernetes.test.imageTag=sometag \
+                            -Dspark.kubernetes.test.imageRepo=docker.io/somerepo \
+                            -Dspark.kubernetes.test.namespace=spark-int-tests \
+                            -Dspark.kubernetes.test.deployMode=docker-for-desktop \
+                            -Dtest.include.tags=k8s
+
+
+## Available Maven Properties
+
+The following are the available Maven properties that can be passed.  For the most part these correspond to flags passed 
+to the wrapper scripts and using the wrapper scripts will simply set these appropriately behind the scenes.
+
+<table>
+  <tr>
+    <th>Property</th>
+    <th>Description</th>
+    <th>Default</th>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.sparkTgz</code></td>
+    <td>
+      A runnable Spark distribution to test.
+    </td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.unpackSparkDir</code></td>
+    <td>
+      The directory where the runnable Spark distribution will be unpacked.
+    </td>
+    <td><code>${project.build.directory}/spark-dist-unpacked</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.deployMode</code></td>
+    <td>
+      The integration test backend to use.  Acceptable values are <code>minikube</code>, 
+      <code>docker-for-desktop</code> and <code>cloud</code>.
+    <td><code>minikube</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.kubeConfigContext</code></td>
+    <td>
+      When using the <code>cloud</code> backend specifies the context from the users K8S config file that should be used
+      as the target cluster for integration testing.  If not set and using the <code>cloud</code> backend then your 
+      current context will be used.
+    </td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.master</code></td>
+    <td>
+      When using the <code>cloud-url</code> backend must be specified to indicate the K8S master URL to communicate 
+      with.
+    </td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.imageTag</code></td>
+    <td>
+      A specific image tag to use, when set assumes images with those tags are already built and available in the 
+      specified image repository.  When set to <code>N/A</code> (the default) fresh images will be built.
+    </td>
+    <td><code>N/A</code>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.imageTagFile</code></td>
+    <td>
+      A file containing the image tag to use, if no specific image tag is set then fresh images will be built with a 
+      generated tag and that tag written to this file.
+    </td>
+    <td><code>${project.build.directory}/imageTag.txt</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.imageRepo</code></td>
+    <td>
+      The Docker image repository that contains the images to be used if a specific image tag is set or to which the 
+      images will be pushed to if fresh images are being built.
+    </td>
+    <td><code>docker.io/kubespark</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.namespace</code></td>
+    <td>
+      A specific Kubernetes namespace to run the tests in.  If specified then the tests assume that this namespace 
+      already exists. When not specified a temporary namespace for the tests will be created and deleted as part of the
+      test run.
+    </td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.serviceAccountName</code></td>
+    <td>
+      A specific Kubernetes service account to use for running the tests.  If not specified then the namespaces default
+      service account will be used and that must have sufficient permissions or the tests will fail.
+    </td>
+    <td></td>
+  </tr>
+</table>
diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
@@ -26,6 +26,7 @@ IMAGE_TAG="N/A"
 SPARK_MASTER=
 NAMESPACE=
 SERVICE_ACCOUNT=
+CONTEXT=
 INCLUDE_TAGS="k8s"
 EXCLUDE_TAGS=
 SCALA_VERSION="$($TEST_ROOT_DIR/build/mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=scala.binary.version | grep -v '\[' )"
@@ -61,6 +62,10 @@ while (( "$#" )); do
       SERVICE_ACCOUNT="$2"
       shift
       ;;
+    --context)
+      CONTEXT="$2"
+      shift
+      ;;
     --include-tags)
       INCLUDE_TAGS="k8s,$2"
       shift
@@ -94,6 +99,11 @@ then
   properties=( ${properties[@]} -Dspark.kubernetes.test.serviceAccountName=$SERVICE_ACCOUNT )
 fi
 
+if [ -n $CONTEXT ];
+then
+  properties=( ${properties[@]} -Dspark.kubernetes.test.kubeConfigContext=$CONTEXT )
+fi
+
 if [ -n $SPARK_MASTER ];
 then
   properties=( ${properties[@]} -Dspark.kubernetes.test.master=$SPARK_MASTER )

diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -33,11 +33,20 @@
     <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
     <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
+
+    <!-- Integration Test Configuration Properties -->
+    <!-- Please see README.md in this directory for explanation of these -->
+    <spark.kubernetes.test.sparkTgz></spark.kubernetes.test.sparkTgz>
     <spark.kubernetes.test.unpackSparkDir>${project.build.directory}/spark-dist-unpacked</spark.kubernetes.test.unpackSparkDir>
     <spark.kubernetes.test.imageTag>N/A</spark.kubernetes.test.imageTag>
     <spark.kubernetes.test.imageTagFile>${project.build.directory}/imageTag.txt</spark.kubernetes.test.imageTagFile>
     <spark.kubernetes.test.deployMode>minikube</spark.kubernetes.test.deployMode>
     <spark.kubernetes.test.imageRepo>docker.io/kubespark</spark.kubernetes.test.imageRepo>
+    <spark.kubernetes.test.kubeConfigContext></spark.kubernetes.test.kubeConfigContext>
+    <spark.kubernetes.test.master></spark.kubernetes.test.master>
+    <spark.kubernetes.test.namespace></spark.kubernetes.test.namespace>
+    <spark.kubernetes.test.serviceAccountName></spark.kubernetes.test.serviceAccountName>
+
     <test.exclude.tags></test.exclude.tags>
     <test.include.tags></test.include.tags>
   </properties>
@@ -135,6 +144,7 @@
             <spark.kubernetes.test.unpackSparkDir>${spark.kubernetes.test.unpackSparkDir}</spark.kubernetes.test.unpackSparkDir>
             <spark.kubernetes.test.imageRepo>${spark.kubernetes.test.imageRepo}</spark.kubernetes.test.imageRepo>
             <spark.kubernetes.test.deployMode>${spark.kubernetes.test.deployMode}</spark.kubernetes.test.deployMode>
+            <spark.kubernetes.test.kubeConfigContext>${spark.kubernetes.test.kubeConfigContext}</spark.kubernetes.test.kubeConfigContext>
             <spark.kubernetes.test.master>${spark.kubernetes.test.master}</spark.kubernetes.test.master>
             <spark.kubernetes.test.namespace>${spark.kubernetes.test.namespace}</spark.kubernetes.test.namespace>
             <spark.kubernetes.test.serviceAccountName>${spark.kubernetes.test.serviceAccountName}</spark.kubernetes.test.serviceAccountName>

diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -71,19 +71,36 @@ if [[ $IMAGE_TAG == "N/A" ]];
 then
   IMAGE_TAG=$(uuidgen);
   cd $UNPACKED_SPARK_TGZ
-  if [[ $DEPLOY_MODE == cloud ]] ;
-  then
-    $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
-    if  [[ $IMAGE_REPO == gcr.io* ]] ;
-    then
-      gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG
-    else
-      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
-    fi
-  else
-    # -m option for minikube.
-    $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build
-  fi
+
+  case $DEPLOY_MODE in
+    cloud)
+      # Build images
+      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
+
+      # Push images appropriately
+      if [[ $IMAGE_REPO == gcr.io* ]] ;
+      then
+        gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG
+      else
+        $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push
+      fi
+      ;;
+
+    docker-for-desktop)
+       # Only need to build as this will place it in our local Docker repo which is all
+       # we need for Docker for Desktop to work so no need to also push
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build
+       ;;
+
+    minikube)
+       # Only need to build and if we do this with the -m option for minikube we will
+       # build the images directly using the minikube Docker daemon so no need to push
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build
+       ;;
+    *)
+       echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1
+       ;;
+  esac
   cd -
 fi
 

diff --git a/...on-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/...on-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -33,6 +33,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s.integrationtest.TestConfig._
+import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.deploy.k8s.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory}
 import org.apache.spark.internal.Logging
 
@@ -77,7 +78,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite
       System.clearProperty(key)
     }
 
-    val sparkDirProp = System.getProperty("spark.kubernetes.test.unpackSparkDir")
+    val sparkDirProp = System.getProperty(CONFIG_KEY_UNPACK_DIR)
     require(sparkDirProp != null, "Spark home directory must be provided in system properties.")
     sparkHomeDir = Paths.get(sparkDirProp)
     require(sparkHomeDir.toFile.isDirectory,

diff --git a/...src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/...src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -25,15 +25,16 @@ import scala.collection.mutable
 import io.fabric8.kubernetes.client.DefaultKubernetesClient
 import org.scalatest.concurrent.Eventually
 
+import org.apache.spark.deploy.k8s.integrationtest.TestConstants._
 import org.apache.spark.internal.Logging
 
 private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) {
 
-  val namespaceOption = Option(System.getProperty("spark.kubernetes.test.namespace"))
+  val namespaceOption = Option(System.getProperty(CONFIG_KEY_KUBE_NAMESPACE))
   val hasUserSpecifiedNamespace = namespaceOption.isDefined
   val namespace = namespaceOption.getOrElse(UUID.randomUUID().toString.replaceAll("-", ""))
   val serviceAccountName =
-    Option(System.getProperty("spark.kubernetes.test.serviceAccountName"))
+    Option(System.getProperty(CONFIG_KEY_KUBE_SVC_ACCOUNT))
       .getOrElse("default")
   val kubernetesClient = defaultClient.inNamespace(namespace)
   val clientConfig = kubernetesClient.getConfiguration