-
Notifications
You must be signed in to change notification settings - Fork 28.6k
[SPARK-22839][K8S] Remove the use of init-container for downloading remote dependencies #20669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2fefd0e
431a216
1e63ecb
71f4158
9985ee2
a52110a
08def2c
f8f42f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
// scalastyle:off println | ||
package org.apache.spark.examples | ||
|
||
import java.io.File | ||
|
||
import org.apache.spark.SparkFiles | ||
import org.apache.spark.sql.SparkSession | ||
|
||
/** Usage: SparkRemoteFileTest [file] */ | ||
object SparkRemoteFileTest { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, what is the purpose of this specific example? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To test the presence of a remote file being mounted on the executors via the spark-submit being run by the driver. Should I add a Javadoc ( |
||
def main(args: Array[String]) { | ||
if (args.length < 1) { | ||
System.err.println("Usage: SparkRemoteFileTest <file>") | ||
System.exit(1) | ||
} | ||
val spark = SparkSession | ||
.builder() | ||
.appName("SparkRemoteFileTest") | ||
.getOrCreate() | ||
val sc = spark.sparkContext | ||
val rdd = sc.parallelize(Seq(1)).map(_ => { | ||
val localLocation = SparkFiles.get(args(0)) | ||
println(s"${args(0)} is stored at: $localLocation") | ||
new File(localLocation).isFile | ||
}) | ||
val truthCheck = rdd.collect().head | ||
println(s"Mounting of ${args(0)} was $truthCheck") | ||
spark.stop() | ||
} | ||
} | ||
// scalastyle:on println |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,6 +79,12 @@ private[spark] object Config extends Logging { | |
.stringConf | ||
.createOptional | ||
|
||
val KUBERNETES_DRIVER_SUBMIT_CHECK = | ||
ConfigBuilder("spark.kubernetes.submitInDriver") | ||
.internal() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: The indentation looks slightly off here. |
||
.booleanConf | ||
.createOptional | ||
|
||
val KUBERNETES_EXECUTOR_LIMIT_CORES = | ||
ConfigBuilder("spark.kubernetes.executor.limit.cores") | ||
.doc("Specify the hard cpu limit for each executor pod") | ||
|
@@ -135,73 +141,6 @@ private[spark] object Config extends Logging { | |
.checkValue(interval => interval > 0, s"Logging interval must be a positive time value.") | ||
.createWithDefaultString("1s") | ||
|
||
val JARS_DOWNLOAD_LOCATION = | ||
ConfigBuilder("spark.kubernetes.mountDependencies.jarsDownloadDir") | ||
.doc("Location to download jars to in the driver and executors. When using " + | ||
"spark-submit, this directory must be empty and will be mounted as an empty directory " + | ||
"volume on the driver and executor pod.") | ||
.stringConf | ||
.createWithDefault("/var/spark-data/spark-jars") | ||
|
||
val FILES_DOWNLOAD_LOCATION = | ||
ConfigBuilder("spark.kubernetes.mountDependencies.filesDownloadDir") | ||
.doc("Location to download files to in the driver and executors. When using " + | ||
"spark-submit, this directory must be empty and will be mounted as an empty directory " + | ||
"volume on the driver and executor pods.") | ||
.stringConf | ||
.createWithDefault("/var/spark-data/spark-files") | ||
|
||
val INIT_CONTAINER_IMAGE = | ||
ConfigBuilder("spark.kubernetes.initContainer.image") | ||
.doc("Image for the driver and executor's init-container for downloading dependencies.") | ||
.fallbackConf(CONTAINER_IMAGE) | ||
|
||
val INIT_CONTAINER_MOUNT_TIMEOUT = | ||
ConfigBuilder("spark.kubernetes.mountDependencies.timeout") | ||
.doc("Timeout before aborting the attempt to download and unpack dependencies from remote " + | ||
"locations into the driver and executor pods.") | ||
.timeConf(TimeUnit.SECONDS) | ||
.createWithDefault(300) | ||
|
||
val INIT_CONTAINER_MAX_THREAD_POOL_SIZE = | ||
ConfigBuilder("spark.kubernetes.mountDependencies.maxSimultaneousDownloads") | ||
.doc("Maximum number of remote dependencies to download simultaneously in a driver or " + | ||
"executor pod.") | ||
.intConf | ||
.createWithDefault(5) | ||
|
||
val INIT_CONTAINER_REMOTE_JARS = | ||
ConfigBuilder("spark.kubernetes.initContainer.remoteJars") | ||
.doc("Comma-separated list of jar URIs to download in the init-container. This is " + | ||
"calculated from spark.jars.") | ||
.internal() | ||
.stringConf | ||
.createOptional | ||
|
||
val INIT_CONTAINER_REMOTE_FILES = | ||
ConfigBuilder("spark.kubernetes.initContainer.remoteFiles") | ||
.doc("Comma-separated list of file URIs to download in the init-container. This is " + | ||
"calculated from spark.files.") | ||
.internal() | ||
.stringConf | ||
.createOptional | ||
|
||
val INIT_CONTAINER_CONFIG_MAP_NAME = | ||
ConfigBuilder("spark.kubernetes.initContainer.configMapName") | ||
.doc("Name of the config map to use in the init-container that retrieves submitted files " + | ||
"for the executor.") | ||
.internal() | ||
.stringConf | ||
.createOptional | ||
|
||
val INIT_CONTAINER_CONFIG_MAP_KEY_CONF = | ||
ConfigBuilder("spark.kubernetes.initContainer.configMapKey") | ||
.doc("Key for the entry in the init container config map for submitted files that " + | ||
"corresponds to the properties for this init-container.") | ||
.internal() | ||
.stringConf | ||
.createOptional | ||
|
||
val KUBERNETES_AUTH_SUBMISSION_CONF_PREFIX = | ||
"spark.kubernetes.authenticate.submission" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any particular reason why this is tied to this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uhm, this technically could be a separate PR. I was initially piggy-backing off the work of Marcelo in being able to build Dockerfiles via -f.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we separate this out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a problem.