From cf3446097f47047ab46a076ebdb1f71ee9428f84 Mon Sep 17 00:00:00 2001 From: Matthew Delaney Date: Fri, 9 Oct 2020 22:11:05 -0700 Subject: [PATCH] Add support for spark-submit packages and repositories (#1040) * Add support for spark-submit packages and repositories - added support for packages, exclude-packages, and repositories spark-submit parameters - added these new parameters to the user guide documentation * address review comments * address review comments --- docs/user-guide.md | 19 +++++++++++++++++++ .../sparkoperator.k8s.io/v1beta2/types.go | 14 ++++++++++++++ pkg/controller/sparkapplication/submission.go | 9 +++++++++ 3 files changed, 42 insertions(+) diff --git a/docs/user-guide.md b/docs/user-guide.md index ca1ee95343..dde7b5dae9 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -99,6 +99,25 @@ spec: - gs://spark-data/data-file-2.txt ``` +It's also possible to specify additional jars to obtain from a remote repository by adding maven coordinates to `.spec.deps.packages`. Conflicting transitive dependencies can be addressed by adding to the exclusion list with `.spec.deps.excludePackages`. Additional repositories can be added to the `.spec.deps.repositories` list. These directly translate to the `spark-submit` parameters `--packages`, `--exclude-packages`, and `--repositories`. + +NOTE: +- Each package in the `packages` list must be of the form "groupId:artifactId:version" +- Each package in the `excludePackages` list must be of the form "groupId:artifactId" + +The following example shows how to use these parameters. + +```yaml +spec: + deps: + repositories: + - https://repository.example.com/prod + packages: + - com.example:some-package:1.0.0 + excludePackages: + - com.example:other-package +``` + ### Specifying Spark Configuration There are two ways to add Spark configuration: setting individual Spark configuration properties using the optional field `.spec.sparkConf` or mounting a special Kubernetes ConfigMap storing Spark configuration files (e.g. `spark-defaults.conf`, `spark-env.sh`, `log4j.properties`) using the optional field `.spec.sparkConfigMap`. If `.spec.sparkConfigMap` is used, additionally to mounting the ConfigMap into the driver and executors, the operator additionally sets the environment variable `SPARK_CONF_DIR` to point to the mount path of the ConfigMap. diff --git a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go index 4243231a20..acb0b5cb7b 100644 --- a/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go +++ b/pkg/apis/sparkoperator.k8s.io/v1beta2/types.go @@ -408,6 +408,20 @@ type Dependencies struct { // PyFiles is a list of Python files the Spark application depends on. // +optional PyFiles []string `json:"pyFiles,omitempty"` + // Packages is a list of maven coordinates of jars to include on the driver and executor + // classpaths. This will search the local maven repo, then maven central and any additional + // remote repositories given by the "repositories" option. + // Each papckage should be of the form "groupId:artifactId:version". + // +optional + Packages []string `json:"packages,omitempty"` + // ExcludePackages is a list of "groupId:artifactId", to exclude while resolving the + // dependencies provided in Packages to avoid dependency conflicts. + // +optional + ExcludePackages []string `json:"excludePackages,omitempty"` + // Repositories is a list of additional remote repositories to search for the maven coordinate + // given with the "packages" option. + // +optional + Repositories []string `json:"repositories,omitempty"` } // SparkPodSpec defines common things that can be customized for a Spark driver or executor pod. diff --git a/pkg/controller/sparkapplication/submission.go b/pkg/controller/sparkapplication/submission.go index 5531b43ad7..74fd3b805a 100644 --- a/pkg/controller/sparkapplication/submission.go +++ b/pkg/controller/sparkapplication/submission.go @@ -233,6 +233,15 @@ func addDependenciesConfOptions(app *v1beta2.SparkApplication) []string { if len(app.Spec.Deps.PyFiles) > 0 { depsConfOptions = append(depsConfOptions, "--py-files", strings.Join(app.Spec.Deps.PyFiles, ",")) } + if len(app.Spec.Deps.Packages) > 0 { + depsConfOptions = append(depsConfOptions, "--packages", strings.Join(app.Spec.Deps.Packages, ",")) + } + if len(app.Spec.Deps.ExcludePackages) > 0 { + depsConfOptions = append(depsConfOptions, "--exclude-packages", strings.Join(app.Spec.Deps.ExcludePackages, ",")) + } + if len(app.Spec.Deps.Repositories) > 0 { + depsConfOptions = append(depsConfOptions, "--repositories", strings.Join(app.Spec.Deps.Repositories, ",")) + } return depsConfOptions }