Skip to content

Commit 77510c6

Browse files
merrily01srowen
authored andcommitted
[SPARK-29233][K8S] Add regex expression checks for executorEnv…
### What changes were proposed in this pull request? In kubernetes, there are some naming regular expression requirements and restrictions on environment variable names, such as: - In kubernetes version release-1.7 and earlier, the naming rules of pod environment variable names should meet the requirements of regular expressions: [[A-Za-z_] [A-Za-z0-9_]*](https://github.com/kubernetes/kubernetes/blob/release-1.7/staging/src/k8s.io/apimachinery/pkg/util/validation/validation.go#L169) - In kubernetes version release-1.8 and later, the naming rules of pod environment variable names should meet the requirements of regular expressions: [[-. _ A-ZA-Z][-. _ A-ZA-Z0-9].*](https://github.com/kubernetes/kubernetes/blob/release-1.8/staging/src/k8s.io/apimachinery/pkg/util/validation/validation.go#L305) However, in spark on k8s mode, spark should add restrictions on environmental variable names when creating executorEnv. In addition, we need to use regular expressions adapted to the high version of k8s to increase the restrictions on the names of environmental variables. Otherwise, the pod will not be created properly and the spark application will be suspended. To solve the problem above, a regular validation to executorEnv is added and committed.  ### Why are the changes needed? If no validation rules are added, the environment variable names that don't meet the requirements will cause the pod to not be created properly and the application will be suspended. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Add unit tests and manually run. Closes #25920 from merrily01/SPARK-29233. Authored-by: maruilei <maruilei@jd.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
1 parent 7c5db45 commit 77510c6

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import org.apache.spark.SparkConf
2424
import org.apache.spark.deploy.k8s.Config._
2525
import org.apache.spark.deploy.k8s.Constants._
2626
import org.apache.spark.deploy.k8s.submit._
27+
import org.apache.spark.internal.Logging
2728
import org.apache.spark.internal.config.ConfigEntry
2829
import org.apache.spark.util.Utils
2930

@@ -123,7 +124,7 @@ private[spark] class KubernetesExecutorConf(
123124
val appId: String,
124125
val executorId: String,
125126
val driverPod: Option[Pod])
126-
extends KubernetesConf(sparkConf) {
127+
extends KubernetesConf(sparkConf) with Logging {
127128

128129
override val resourceNamePrefix: String = {
129130
get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX).getOrElse(
@@ -148,7 +149,8 @@ private[spark] class KubernetesExecutorConf(
148149
executorCustomLabels ++ presetLabels
149150
}
150151

151-
override def environment: Map[String, String] = sparkConf.getExecutorEnv.toMap
152+
override def environment: Map[String, String] = sparkConf.getExecutorEnv.filter(
153+
p => checkExecutorEnvKey(p._1)).toMap
152154

153155
override def annotations: Map[String, String] = {
154156
KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
@@ -166,6 +168,20 @@ private[spark] class KubernetesExecutorConf(
166168
KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
167169
}
168170

171+
private def checkExecutorEnvKey(key: String): Boolean = {
172+
// Pattern for matching an executorEnv key, which meets certain naming rules.
173+
val executorEnvRegex = "[-._a-zA-Z][-._a-zA-Z0-9]*".r
174+
if (executorEnvRegex.pattern.matcher(key).matches()) {
175+
true
176+
} else {
177+
logWarning(s"Invalid key: $key: " +
178+
"a valid environment variable name must consist of alphabetic characters, " +
179+
"digits, '_', '-', or '.', and must not start with a digit." +
180+
s"Regex used for validation is '$executorEnvRegex')")
181+
false
182+
}
183+
}
184+
169185
}
170186

171187
private[spark] object KubernetesConf {

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ class KubernetesConfSuite extends SparkFunSuite {
4444
"customEnvKey2" -> "customEnvValue2")
4545
private val DRIVER_POD = new PodBuilder().build()
4646
private val EXECUTOR_ID = "executor-id"
47+
private val EXECUTOR_ENV_VARS = Map(
48+
"spark.executorEnv.1executorEnvVars1/var1" -> "executorEnvVars1",
49+
"spark.executorEnv.executorEnvVars2*var2" -> "executorEnvVars2",
50+
"spark.executorEnv.executorEnvVars3_var3" -> "executorEnvVars3",
51+
"spark.executorEnv.executorEnvVars4-var4" -> "executorEnvVars4",
52+
"spark.executorEnv.executorEnvVars5-var5" -> "executorEnvVars5/var5")
4753

4854
test("Resolve driver labels, annotations, secret mount paths, envs, and memory overhead") {
4955
val sparkConf = new SparkConf(false)
@@ -132,4 +138,22 @@ class KubernetesConfSuite extends SparkFunSuite {
132138
assert(conf.secretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
133139
assert(conf.secretEnvNamesToKeyRefs === SECRET_ENV_VARS)
134140
}
141+
142+
test("Verify that executorEnv key conforms to the regular specification") {
143+
val sparkConf = new SparkConf(false)
144+
EXECUTOR_ENV_VARS.foreach { case (key, value) =>
145+
sparkConf.set(key, value)
146+
}
147+
148+
val conf = KubernetesConf.createExecutorConf(
149+
sparkConf,
150+
EXECUTOR_ID,
151+
KubernetesTestConf.APP_ID,
152+
Some(DRIVER_POD))
153+
assert(conf.environment ===
154+
Map(
155+
"executorEnvVars3_var3" -> "executorEnvVars3",
156+
"executorEnvVars4-var4" -> "executorEnvVars4",
157+
"executorEnvVars5-var5" -> "executorEnvVars5/var5"))
158+
}
135159
}

0 commit comments

Comments
 (0)