From fc491a23827b83ccc6a5c634752b33af78325713 Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Mon, 10 Feb 2020 09:01:06 -0800 Subject: [PATCH] Upgraded to Spark 2.4.5 (#798) --- Dockerfile | 2 +- Dockerfile.rh | 2 +- README.md | 4 ++-- docs/developer-guide.md | 2 +- docs/quick-start-guide.md | 2 +- docs/user-guide.md | 16 ++++++++-------- docs/volcano-integration.md | 10 +++++----- examples/spark-pi-configmap.yaml | 10 +++++----- examples/spark-pi-prometheus.yaml | 10 +++++----- examples/spark-pi-schedule.yaml | 10 +++++----- examples/spark-pi.yaml | 10 +++++----- examples/spark-py-pi.yaml | 8 ++++---- manifest/spark-operator-with-metrics.yaml | 8 ++++---- manifest/spark-operator-with-webhook.yaml | 16 ++++++++-------- manifest/spark-operator.yaml | 8 ++++---- spark-docker/Dockerfile | 2 +- test/e2e/README.md | 2 +- 17 files changed, 61 insertions(+), 61 deletions(-) diff --git a/Dockerfile b/Dockerfile index 193cd36cc3..1441bb0960 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # limitations under the License. # -ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.5-SNAPSHOT +ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.5 FROM golang:1.12.5-alpine as builder ARG DEP_VERSION="0.5.3" diff --git a/Dockerfile.rh b/Dockerfile.rh index 97def7fcb6..b87b7e812b 100644 --- a/Dockerfile.rh +++ b/Dockerfile.rh @@ -20,7 +20,7 @@ # 1. Your Docker version is >= 18.09.3 # 2. export DOCKER_BUILDKIT=1 -ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.4 +ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.5 FROM golang:1.12.5-alpine as builder ARG DEP_VERSION="0.5.3" diff --git a/README.md b/README.md index ef5b7448f7..508181d2c8 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ The following table lists the most recent few versions of the operator. | Operator Version | API Version | Kubernetes Version | Base Spark Version | Operator Image Tag | | ------------- | ------------- | ------------- | ------------- | ------------- | -| `latest` (master HEAD) | `v1beta2` | 1.13+ | `2.4.5-SNAPSHOT` | `latest` | -| `v1beta2-1.0.2-2.4.5-SNAPSHOT` | `v1beta2` | 1.13+ | `2.4.5-SNAPSHOT` | `v1beta2-1.0.2-2.4.5-SNAPSHOT` | +| `latest` (master HEAD) | `v1beta2` | 1.13+ | `2.4.5` | `latest` | +| `v1beta2-1.1.0-2.4.5` | `v1beta2` | 1.13+ | `2.4.5` | `v1beta2-1.1.0-2.4.5` | | `v1beta2-1.0.1-2.4.4` | `v1beta2` | 1.13+ | `2.4.4` | `v1beta2-1.0.1-2.4.4` | | `v1beta2-1.0.0-2.4.4` | `v1beta2` | 1.13+ | `2.4.4` | `v1beta2-1.0.0-2.4.4` | | `v1beta1-0.9.0` | `v1beta1` | 1.13+ | `2.4.0` | `v2.4.0-v1beta1-0.9.0` | diff --git a/docs/developer-guide.md b/docs/developer-guide.md index a0fa58cc7b..9a00c06ca7 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -10,7 +10,7 @@ The easiest way to build the operator without worrying about its dependencies is $ docker build -t . ``` -The operator image is built upon a base Spark image that defaults to `gcr.io/spark-operator/spark:v2.4.4`. If you want to use your own Spark image (e.g., an image with a different version of Spark or some custom dependencies), specify the argument `SPARK_IMAGE` as the following example shows: +The operator image is built upon a base Spark image that defaults to `gcr.io/spark-operator/spark:v2.4.5`. If you want to use your own Spark image (e.g., an image with a different version of Spark or some custom dependencies), specify the argument `SPARK_IMAGE` as the following example shows: ```bash $ docker build --build-arg SPARK_IMAGE= -t . diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index 9093b0ef18..6a19522326 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -88,7 +88,7 @@ spec: labels: version: 2.3.0 memory: 512m - image: gcr.io/ynli-k8s/spark:v2.4.4 + image: gcr.io/ynli-k8s/spark:v2.4.5 mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.11-2.3.0.jar mainClass: org.apache.spark.examples.SparkPi mode: cluster diff --git a/docs/user-guide.md b/docs/user-guide.md index 5c61a7967f..1484e6ab15 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -67,9 +67,9 @@ metadata: spec: type: Scala mode: cluster - image: gcr.io/spark/spark:v2.4.4 + image: gcr.io/spark/spark:v2.4.5 mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar + mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar ``` ### Specifying Application Dependencies @@ -133,7 +133,7 @@ spec: coreLimit: 200m memory: 512m labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark ``` @@ -153,7 +153,7 @@ spec: instances: 1 memory: 512m labels: - version: 2.4.4 + version: 2.4.5 ``` ### Specifying Extra Java Options @@ -234,7 +234,7 @@ spec: name: "amd.com/gpu" # GPU resource name quantity: 1 # number of GPUs to request labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark executor: cores: 1 @@ -258,7 +258,7 @@ spec: memory: "512m" hostNetwork: true labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark executor: cores: 1 @@ -590,7 +590,7 @@ Note that Python binding for PySpark is available in Apache Spark 2.4. The operator supports using the Spark metric system to expose metrics to a variety of sinks. Particularly, it is able to automatically configure the metric system to expose metrics to [Prometheus](https://prometheus.io/). Specifically, the field `.spec.monitoring` specifies how application monitoring is handled and particularly how metrics are to be reported. The metric system is configured through the configuration file `metrics.properties`, which gets its content from the field `.spec.monitoring.metricsProperties`. The content of [metrics.properties](../spark-docker/conf/metrics.properties) will be used by default if `.spec.monitoring.metricsProperties` is not specified. You can choose to enable or disable reporting driver and executor metrics using the fields `.spec.monitoring.exposeDriverMetrics` and `.spec.monitoring.exposeExecutorMetrics`, respectively. -Further, the field `.spec.monitoring.prometheus` specifies how metrics are exposed to Prometheus using the [Prometheus JMX exporter](https://github.com/prometheus/jmx_exporter). When `.spec.monitoring.prometheus` is specified, the operator automatically configures the JMX exporter to run as a Java agent. The only required field of `.spec.monitoring.prometheus` is `jmxExporterJar`, which specified the path to the Prometheus JMX exporter Java agent jar in the container. If you use the image `gcr.io/spark-operator/spark:v2.4.4-gcs-prometheus`, the jar is located at `/prometheus/jmx_prometheus_javaagent-0.11.0.jar`. The field `.spec.monitoring.prometheus.port` specifies the port the JMX exporter Java agent binds to and defaults to `8090` if not specified. The field `.spec.monitoring.prometheus.configuration` specifies the content of the configuration to be used with the JMX exporter. The content of [prometheus.yaml](../spark-docker/conf/prometheus.yaml) will be used by default if `.spec.monitoring.prometheus.configuration` is not specified. +Further, the field `.spec.monitoring.prometheus` specifies how metrics are exposed to Prometheus using the [Prometheus JMX exporter](https://github.com/prometheus/jmx_exporter). When `.spec.monitoring.prometheus` is specified, the operator automatically configures the JMX exporter to run as a Java agent. The only required field of `.spec.monitoring.prometheus` is `jmxExporterJar`, which specified the path to the Prometheus JMX exporter Java agent jar in the container. If you use the image `gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus`, the jar is located at `/prometheus/jmx_prometheus_javaagent-0.11.0.jar`. The field `.spec.monitoring.prometheus.port` specifies the port the JMX exporter Java agent binds to and defaults to `8090` if not specified. The field `.spec.monitoring.prometheus.configuration` specifies the content of the configuration to be used with the JMX exporter. The content of [prometheus.yaml](../spark-docker/conf/prometheus.yaml) will be used by default if `.spec.monitoring.prometheus.configuration` is not specified. Below is an example that shows how to configure the metric system to expose metrics to Prometheus using the Prometheus JMX exporter. Note that the JMX exporter Java agent jar is listed as a dependency and will be downloaded to where `.spec.dep.jarsDownloadDir` points to in Spark 2.3.x, which is `/var/spark-data/spark-jars` by default. Things are different in Spark 2.4 as dependencies will be downloaded to the local working directory instead in Spark 2.4. A complete example can be found in [examples/spark-pi-prometheus.yaml](../examples/spark-pi-prometheus.yaml). @@ -678,7 +678,7 @@ spec: template: type: Scala mode: cluster - image: gcr.io/spark/spark:v2.4.4 + image: gcr.io/spark/spark:v2.4.5 mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: local:///opt/spark/examples/jars/spark-examples_2.11-2.3.0.jar driver: diff --git a/docs/volcano-integration.md b/docs/volcano-integration.md index 7a7191fb9d..061f5f5114 100644 --- a/docs/volcano-integration.md +++ b/docs/volcano-integration.md @@ -31,11 +31,11 @@ metadata: spec: type: Scala mode: cluster - image: "gcr.io/spark-operator/spark:v2.4.4" + image: "gcr.io/spark-operator/spark:v2.4.5" imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar" - sparkVersion: "2.4.4" + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar" + sparkVersion: "2.4.5" batchScheduler: "volcano" #Note: the batch scheduler name must be specified with `volcano` restartPolicy: type: Never @@ -49,7 +49,7 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark volumeMounts: - name: "test-volume" @@ -59,7 +59,7 @@ spec: instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 volumeMounts: - name: "test-volume" mountPath: "/tmp" diff --git a/examples/spark-pi-configmap.yaml b/examples/spark-pi-configmap.yaml index d1e8bffa8b..56016ed415 100644 --- a/examples/spark-pi-configmap.yaml +++ b/examples/spark-pi-configmap.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: "gcr.io/spark-operator/spark:v2.4.4" + image: "gcr.io/spark-operator/spark:v2.4.5" imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar" - sparkVersion: "2.4.4" + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar" + sparkVersion: "2.4.5" restartPolicy: type: Never volumes: @@ -37,7 +37,7 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark volumeMounts: - name: config-vol @@ -47,7 +47,7 @@ spec: instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 volumeMounts: - name: config-vol mountPath: /opt/spark/mycm diff --git a/examples/spark-pi-prometheus.yaml b/examples/spark-pi-prometheus.yaml index af7b27f399..68d75d2561 100644 --- a/examples/spark-pi-prometheus.yaml +++ b/examples/spark-pi-prometheus.yaml @@ -22,13 +22,13 @@ metadata: spec: type: Scala mode: cluster - image: "gcr.io/spark-operator/spark:v2.4.4-gcs-prometheus" + image: "gcr.io/spark-operator/spark:v2.4.5-gcs-prometheus" imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar" + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar" arguments: - "100000" - sparkVersion: "2.4.4" + sparkVersion: "2.4.5" restartPolicy: type: Never driver: @@ -36,14 +36,14 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark executor: cores: 1 instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 monitoring: exposeDriverMetrics: true exposeExecutorMetrics: true diff --git a/examples/spark-pi-schedule.yaml b/examples/spark-pi-schedule.yaml index b76378c087..a95d3e1d1d 100644 --- a/examples/spark-pi-schedule.yaml +++ b/examples/spark-pi-schedule.yaml @@ -25,11 +25,11 @@ spec: template: type: Scala mode: cluster - image: "gcr.io/spark-operator/spark:v2.4.4" + image: "gcr.io/spark-operator/spark:v2.4.5" imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar" - sparkVersion: "2.4.4" + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar" + sparkVersion: "2.4.5" restartPolicy: type: Never driver: @@ -37,11 +37,11 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark executor: cores: 1 instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 diff --git a/examples/spark-pi.yaml b/examples/spark-pi.yaml index f62c90c0fd..5b62597fb9 100644 --- a/examples/spark-pi.yaml +++ b/examples/spark-pi.yaml @@ -21,11 +21,11 @@ metadata: spec: type: Scala mode: cluster - image: "gcr.io/spark-operator/spark:v2.4.4" + image: "gcr.io/spark-operator/spark:v2.4.5" imagePullPolicy: Always mainClass: org.apache.spark.examples.SparkPi - mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar" - sparkVersion: "2.4.4" + mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.5.jar" + sparkVersion: "2.4.5" restartPolicy: type: Never volumes: @@ -38,7 +38,7 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark volumeMounts: - name: "test-volume" @@ -48,7 +48,7 @@ spec: instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 volumeMounts: - name: "test-volume" mountPath: "/tmp" diff --git a/examples/spark-py-pi.yaml b/examples/spark-py-pi.yaml index 7566a60084..5f644d493b 100644 --- a/examples/spark-py-pi.yaml +++ b/examples/spark-py-pi.yaml @@ -25,10 +25,10 @@ spec: type: Python pythonVersion: "2" mode: cluster - image: "gcr.io/spark-operator/spark-py:v2.4.4" + image: "gcr.io/spark-operator/spark-py:v2.4.5" imagePullPolicy: Always mainApplicationFile: local:///opt/spark/examples/src/main/python/pi.py - sparkVersion: "2.4.4" + sparkVersion: "2.4.5" restartPolicy: type: OnFailure onFailureRetries: 3 @@ -40,11 +40,11 @@ spec: coreLimit: "1200m" memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 serviceAccount: spark executor: cores: 1 instances: 1 memory: "512m" labels: - version: 2.4.4 + version: 2.4.5 diff --git a/manifest/spark-operator-with-metrics.yaml b/manifest/spark-operator-with-metrics.yaml index fe44ecb834..b668543bfe 100644 --- a/manifest/spark-operator-with-metrics.yaml +++ b/manifest/spark-operator-with-metrics.yaml @@ -21,13 +21,13 @@ metadata: namespace: spark-operator labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 strategy: type: Recreate template: @@ -38,12 +38,12 @@ spec: prometheus.io/path: "/metrics" labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: serviceAccountName: sparkoperator containers: - name: sparkoperator - image: gcr.io/spark-operator/spark-operator:v2.4.4-v1beta2-latest + image: gcr.io/spark-operator/spark-operator:v2.4.5-v1beta2-latest imagePullPolicy: Always ports: - containerPort: 10254 diff --git a/manifest/spark-operator-with-webhook.yaml b/manifest/spark-operator-with-webhook.yaml index cba5e72115..b2f292fa10 100644 --- a/manifest/spark-operator-with-webhook.yaml +++ b/manifest/spark-operator-with-webhook.yaml @@ -21,20 +21,20 @@ metadata: namespace: spark-operator labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: serviceAccountName: sparkoperator volumes: @@ -43,7 +43,7 @@ spec: secretName: spark-webhook-certs containers: - name: sparkoperator - image: gcr.io/spark-operator/spark-operator:v2.4.4-v1beta2-latest + image: gcr.io/spark-operator/spark-operator:v2.4.5-v1beta2-latest imagePullPolicy: Always volumeMounts: - name: webhook-certs @@ -62,20 +62,20 @@ metadata: namespace: spark-operator labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: backoffLimit: 3 template: metadata: labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: serviceAccountName: sparkoperator restartPolicy: Never containers: - name: main - image: gcr.io/spark-operator/spark-operator:v2.4.4-v1beta2-latest + image: gcr.io/spark-operator/spark-operator:v2.4.5-v1beta2-latest imagePullPolicy: IfNotPresent command: ["/usr/bin/gencerts.sh", "-p"] --- @@ -91,4 +91,4 @@ spec: name: webhook selector: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 diff --git a/manifest/spark-operator.yaml b/manifest/spark-operator.yaml index a75a70cd2c..589b1b591b 100644 --- a/manifest/spark-operator.yaml +++ b/manifest/spark-operator.yaml @@ -21,25 +21,25 @@ metadata: namespace: spark-operator labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: sparkoperator - app.kubernetes.io/version: v2.4.4-v1beta2 + app.kubernetes.io/version: v2.4.5-v1beta2 spec: serviceAccountName: sparkoperator containers: - name: sparkoperator - image: gcr.io/spark-operator/spark-operator:v2.4.4-v1beta2-latest + image: gcr.io/spark-operator/spark-operator:v2.4.5-v1beta2-latest imagePullPolicy: Always args: - -logtostderr diff --git a/spark-docker/Dockerfile b/spark-docker/Dockerfile index 95ca4514ef..dffa4104f8 100644 --- a/spark-docker/Dockerfile +++ b/spark-docker/Dockerfile @@ -14,7 +14,7 @@ # limitations under the License. # -ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.4 +ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v2.4.5 FROM ${SPARK_IMAGE} # Setup dependencies for Google Cloud Storage access. diff --git a/test/e2e/README.md b/test/e2e/README.md index f979f8cdb3..4eaed1e3ec 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -12,7 +12,7 @@ Prerequisites: e2e tests are written as Go test. All go test techniques apply (e.g. picking what to run, timeout length). Let's say I want to run all tests in "test/e2e/": ```bash -$ go test -v ./test/e2e/ --kubeconfig "$HOME/.kube/config" --operator-image=gcr.io/spark-operator/spark-operator:v2.4.4-v1beta2-latest +$ go test -v ./test/e2e/ --kubeconfig "$HOME/.kube/config" --operator-image=gcr.io/spark-operator/spark-operator:v2.4.5-v1beta2-latest ``` ### Available Tests