From f5fd71894ec7421918beda5279b42f3b6b13870b Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 28 May 2024 12:54:34 -0700 Subject: [PATCH] comments --- .../apache/spark/deploy/SparkSubmitArguments.scala | 3 ++- docs/configuration.md | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 096c35079b22e..01ca56c38a963 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -125,7 +125,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S * When this is called, `sparkProperties` is already filled with configs from the latter. */ private def mergeDefaultSparkProperties(): Unit = { - // Honor --conf before the defaults file + // Honor --conf before the specified properties file and defaults file defaultSparkProperties.foreach { case (k, v) => if (!sparkProperties.contains(k)) { sparkProperties(k) = v @@ -133,6 +133,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S } // Also load properties from `spark-defaults.conf` if they do not exist in the properties file + // and --conf list val defaultSparkConf = Utils.getDefaultPropertiesFile(env) Option(defaultSparkConf).foreach { filename => val properties = Utils.getPropertiesFromFile(filename) diff --git a/docs/configuration.md b/docs/configuration.md index ecd9cd75487ff..aaaaca05341d1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -111,12 +111,15 @@ each line consists of a key and a value separated by whitespace. For example: spark.eventLog.enabled true spark.serializer org.apache.spark.serializer.KryoSerializer +In addition, a property file with Spark configurations can be passed to `bin/spark-submit` via +the `--properties-file` parameter. + Any values specified as flags or in the properties file will be passed on to the application and merged with those specified through SparkConf. Properties set directly on the SparkConf -take highest precedence, then flags passed to `spark-submit` or `spark-shell`, then options -in the `spark-defaults.conf` file. A few configuration keys have been renamed since earlier -versions of Spark; in such cases, the older key names are still accepted, but take lower -precedence than any instance of the newer key. +take the highest precedence, then those through `--conf` flags or `--properties-file` passed to +`spark-submit` or `spark-shell`, then options in the `spark-defaults.conf` file. A few +configuration keys have been renamed since earlier versions of Spark; in such cases, the older +key names are still accepted, but take lower precedence than any instance of the newer key. Spark properties mainly can be divided into two kinds: one is related to deploy, like "spark.driver.memory", "spark.executor.instances", this kind of properties may not be affected when