Skip to content

Commit 66609a2

Browse files
committed
[SPARK-32067][k8s] Give pod template configmap a unique name
The pod template configmap always had the same name for each job submitted. This means if you schedule 2 spark jobs in the same namespace there will be conflicts.
1 parent 9b88aca commit 66609a2

File tree

6 files changed

+61
-13
lines changed

6 files changed

+61
-13
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ private[spark] object Constants {
8686
val DEFAULT_EXECUTOR_CONTAINER_NAME = "spark-kubernetes-executor"
8787
val MEMORY_OVERHEAD_MIN_MIB = 384L
8888
val NON_JVM_MEMORY_OVERHEAD_FACTOR = 0.4d
89+
val KUBERNETES_MAX_NAME_LENGTH = 63
8990

9091
// Hadoop Configuration
9192
val HADOOP_CONF_VOLUME = "hadoop-properties"

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ private[spark] class BasicExecutorFeatureStep(
8383
// hostname must be no longer than 63 characters, so take the last 63 characters of the pod
8484
// name as the hostname. This preserves uniqueness since the end of name contains
8585
// executorId
86-
val hostname = name.substring(Math.max(0, name.length - 63))
86+
val hostname = name.substring(Math.max(0, name.length - KUBERNETES_MAX_NAME_LENGTH))
8787
// Remove non-word characters from the start of the hostname
8888
.replaceAll("^[^\\w]+", "")
8989
// Replace dangerous characters in the remaining string with a safe alternative.

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ private[spark] class DriverServiceFeatureStep(
3939
"managed via a Kubernetes service.")
4040

4141
private val preferredServiceName = s"${kubernetesConf.resourceNamePrefix}$DRIVER_SVC_POSTFIX"
42-
private val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
42+
private val resolvedServiceName = if (preferredServiceName.length <= KUBERNETES_MAX_NAME_LENGTH) {
4343
preferredServiceName
4444
} else {
4545
val randomServiceId = KubernetesUtils.uniqueID(clock = clock)
4646
val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
4747
logWarning(s"Driver's hostname would preferably be $preferredServiceName, but this is " +
48-
s"too long (must be <= $MAX_SERVICE_NAME_LENGTH characters). Falling back to use " +
48+
s"too long (must be <= $KUBERNETES_MAX_NAME_LENGTH characters). Falling back to use " +
4949
s"$shorterServiceName as the driver service's name.")
5050
shorterServiceName
5151
}
@@ -99,5 +99,4 @@ private[spark] object DriverServiceFeatureStep {
9999
val DRIVER_BIND_ADDRESS_KEY = config.DRIVER_BIND_ADDRESS.key
100100
val DRIVER_HOST_KEY = config.DRIVER_HOST_ADDRESS.key
101101
val DRIVER_SVC_POSTFIX = "-driver-svc"
102-
val MAX_SERVICE_NAME_LENGTH = 63
103102
}

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,39 @@ import java.nio.charset.StandardCharsets
2222
import com.google.common.io.Files
2323
import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, ContainerBuilder, HasMetadata, PodBuilder}
2424

25-
import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
25+
import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkPod}
2626
import org.apache.spark.deploy.k8s.Config._
2727
import org.apache.spark.deploy.k8s.Constants._
28+
import org.apache.spark.internal.Logging
29+
import org.apache.spark.util.{Clock, SystemClock}
2830

29-
private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
30-
extends KubernetesFeatureConfigStep {
31+
private[spark] class PodTemplateConfigMapStep
32+
(conf: KubernetesConf, clock: Clock = new SystemClock())
33+
extends KubernetesFeatureConfigStep with Logging {
3134

3235
private val hasTemplate = conf.contains(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
3336

37+
private val preferredConfigmapName = s"${conf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP"
38+
private val resolvedConfigmapName = if (preferredConfigmapName.length <= KUBERNETES_MAX_NAME_LENGTH)
39+
{
40+
preferredConfigmapName
41+
} else {
42+
val randomServiceId = KubernetesUtils.uniqueID(clock = clock)
43+
val shorterTemplateConfigmapName = s"spark-$randomServiceId-$POD_TEMPLATE_CONFIGMAP"
44+
logWarning(s"The pod template configmap name preferably be $preferredConfigmapName," +
45+
s"but this is too long (must be <= $KUBERNETES_MAX_NAME_LENGTH characters). Falling back to" +
46+
s"use $shorterTemplateConfigmapName as the pod template configmap name.")
47+
shorterTemplateConfigmapName
48+
}
49+
3450
def configurePod(pod: SparkPod): SparkPod = {
3551
if (hasTemplate) {
3652
val podWithVolume = new PodBuilder(pod.pod)
3753
.editSpec()
3854
.addNewVolume()
3955
.withName(POD_TEMPLATE_VOLUME)
4056
.withNewConfigMap()
41-
.withName(POD_TEMPLATE_CONFIGMAP)
57+
.withName(resolvedConfigmapName)
4258
.addNewItem()
4359
.withKey(POD_TEMPLATE_KEY)
4460
.withPath(EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME)
@@ -76,7 +92,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
7692
val podTemplateString = Files.toString(new File(podTemplateFile), StandardCharsets.UTF_8)
7793
Seq(new ConfigMapBuilder()
7894
.withNewMetadata()
79-
.withName(POD_TEMPLATE_CONFIGMAP)
95+
.withName(resolvedConfigmapName)
8096
.endMetadata()
8197
.addToData(POD_TEMPLATE_KEY, podTemplateString)
8298
.build())
@@ -85,3 +101,4 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
85101
}
86102
}
87103
}
104+

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ import org.apache.spark.util.ManualClock
3333
class DriverServiceFeatureStepSuite extends SparkFunSuite {
3434

3535
private val LONG_RESOURCE_NAME_PREFIX =
36-
"a" * (DriverServiceFeatureStep.MAX_SERVICE_NAME_LENGTH -
37-
DriverServiceFeatureStep.DRIVER_SVC_POSTFIX.length + 1)
36+
"a" * (KUBERNETES_MAX_NAME_LENGTH - DriverServiceFeatureStep.DRIVER_SVC_POSTFIX.length + 1)
3837
private val DRIVER_LABELS = Map(
3938
"label1key" -> "label1value",
4039
"label2key" -> "label2value")

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@ import java.nio.file.Files
2121

2222
import io.fabric8.kubernetes.api.model.ConfigMap
2323

24+
import org.apache.spark.deploy.k8s.Constants.{KUBERNETES_MAX_NAME_LENGTH, POD_TEMPLATE_CONFIGMAP}
2425
import org.apache.spark.{SparkConf, SparkFunSuite}
2526
import org.apache.spark.deploy.k8s._
2627
import org.apache.spark.util.Utils
2728

2829
class PodTemplateConfigMapStepSuite extends SparkFunSuite {
2930

31+
private val LONG_RESOURCE_NAME_PREFIX =
32+
"a" * (KUBERNETES_MAX_NAME_LENGTH - POD_TEMPLATE_CONFIGMAP.length + 1)
33+
3034
test("Do nothing when executor template is not specified") {
3135
val conf = KubernetesTestConf.createDriverConf()
3236
val step = new PodTemplateConfigMapStep(conf)
@@ -56,8 +60,9 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite {
5660

5761
assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
5862
val volume = configuredPod.pod.getSpec.getVolumes.get(0)
63+
val generatedResourceName = s"${kubernetesConf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP"
5964
assert(volume.getName === Constants.POD_TEMPLATE_VOLUME)
60-
assert(volume.getConfigMap.getName === Constants.POD_TEMPLATE_CONFIGMAP)
65+
assert(volume.getConfigMap.getName === generatedResourceName)
6166
assert(volume.getConfigMap.getItems.size() === 1)
6267
assert(volume.getConfigMap.getItems.get(0).getKey === Constants.POD_TEMPLATE_KEY)
6368
assert(volume.getConfigMap.getItems.get(0).getPath ===
@@ -70,7 +75,7 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite {
7075

7176
val resources = step.getAdditionalKubernetesResources()
7277
assert(resources.size === 1)
73-
assert(resources.head.getMetadata.getName === Constants.POD_TEMPLATE_CONFIGMAP)
78+
assert(resources.head.getMetadata.getName === generatedResourceName)
7479
assert(resources.head.isInstanceOf[ConfigMap])
7580
val configMap = resources.head.asInstanceOf[ConfigMap]
7681
assert(configMap.getData.size() === 1)
@@ -84,4 +89,31 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite {
8489
(Constants.EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH + "/" +
8590
Constants.EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME))
8691
}
92+
93+
test("SPARK-32067: Long prefixes should switch to using a generated unique name.") {
94+
val templateFile = Files.createTempFile("pod-template", "yml").toFile
95+
templateFile.deleteOnExit()
96+
97+
val sparkConf = new SparkConf(false)
98+
.set(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE, templateFile.getAbsolutePath)
99+
val kubernetesConf = KubernetesTestConf.createDriverConf(
100+
sparkConf = sparkConf,
101+
resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX))
102+
103+
Utils.tryWithResource(new PrintWriter(templateFile)) { writer =>
104+
writer.write("pod-template-contents")
105+
}
106+
107+
val step = new PodTemplateConfigMapStep(kubernetesConf)
108+
val configuredPod = step.configurePod(SparkPod.initialPod())
109+
110+
assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
111+
val volume = configuredPod.pod.getSpec.getVolumes.get(0)
112+
assert(!volume.getConfigMap.getName.startsWith(kubernetesConf.resourceNamePrefix))
113+
val resources = step.getAdditionalKubernetesResources()
114+
assert(resources.size === 1)
115+
assert(!resources.head.getMetadata.getName.startsWith(kubernetesConf.resourceNamePrefix))
116+
assert(volume.getConfigMap.getName === resources.head.getMetadata.getName)
117+
assert(resources.head.getMetadata.getName.length <= KUBERNETES_MAX_NAME_LENGTH)
118+
}
87119
}

0 commit comments

Comments
 (0)