Skip to content

Commit 55f3371

Browse files
dvogelbacherMykhailo Shtelma
authored andcommitted
[SPARK-23825][K8S] Requesting memory + memory overhead for pod memory
## What changes were proposed in this pull request? Kubernetes driver and executor pods should request `memory + memoryOverhead` as their resources instead of just `memory`, see https://issues.apache.org/jira/browse/SPARK-23825 ## How was this patch tested? Existing unit tests were adapted. Author: David Vogelbacher <dvogelbacher@palantir.com> Closes apache#20943 from dvogelbacher/spark-23825.
1 parent 1f9284a commit 55f3371

File tree

4 files changed

+7
-11
lines changed

4 files changed

+7
-11
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,6 @@ private[spark] class BasicDriverConfigurationStep(
9393
.withAmount(driverCpuCores)
9494
.build()
9595
val driverMemoryQuantity = new QuantityBuilder(false)
96-
.withAmount(s"${driverMemoryMiB}Mi")
97-
.build()
98-
val driverMemoryLimitQuantity = new QuantityBuilder(false)
9996
.withAmount(s"${driverMemoryWithOverheadMiB}Mi")
10097
.build()
10198
val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
@@ -117,7 +114,7 @@ private[spark] class BasicDriverConfigurationStep(
117114
.withNewResources()
118115
.addToRequests("cpu", driverCpuQuantity)
119116
.addToRequests("memory", driverMemoryQuantity)
120-
.addToLimits("memory", driverMemoryLimitQuantity)
117+
.addToLimits("memory", driverMemoryQuantity)
121118
.addToLimits(maybeCpuLimitQuantity.toMap.asJava)
122119
.endResources()
123120
.addToArgs("driver")

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,6 @@ private[spark] class ExecutorPodFactory(
108108
SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
109109
executorLabels
110110
val executorMemoryQuantity = new QuantityBuilder(false)
111-
.withAmount(s"${executorMemoryMiB}Mi")
112-
.build()
113-
val executorMemoryLimitQuantity = new QuantityBuilder(false)
114111
.withAmount(s"${executorMemoryWithOverhead}Mi")
115112
.build()
116113
val executorCpuQuantity = new QuantityBuilder(false)
@@ -167,7 +164,7 @@ private[spark] class ExecutorPodFactory(
167164
.withImagePullPolicy(imagePullPolicy)
168165
.withNewResources()
169166
.addToRequests("memory", executorMemoryQuantity)
170-
.addToLimits("memory", executorMemoryLimitQuantity)
167+
.addToLimits("memory", executorMemoryQuantity)
171168
.addToRequests("cpu", executorCpuQuantity)
172169
.endResources()
173170
.addAllToEnv(executorEnv.asJava)

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class BasicDriverConfigurationStepSuite extends SparkFunSuite {
9191
val resourceRequirements = preparedDriverSpec.driverContainer.getResources
9292
val requests = resourceRequirements.getRequests.asScala
9393
assert(requests("cpu").getAmount === "2")
94-
assert(requests("memory").getAmount === "256Mi")
94+
assert(requests("memory").getAmount === "456Mi")
9595
val limits = resourceRequirements.getLimits.asScala
9696
assert(limits("memory").getAmount === "456Mi")
9797
assert(limits("cpu").getAmount === "4")

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,14 @@ class ExecutorPodFactorySuite extends SparkFunSuite with BeforeAndAfter with Bef
6666
assert(executor.getMetadata.getLabels.size() === 3)
6767
assert(executor.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL) === "1")
6868

69-
// There is exactly 1 container with no volume mounts and default memory limits.
70-
// Default memory limit is 1024M + 384M (minimum overhead constant).
69+
// There is exactly 1 container with no volume mounts and default memory limits and requests.
70+
// Default memory limit/request is 1024M + 384M (minimum overhead constant).
7171
assert(executor.getSpec.getContainers.size() === 1)
7272
assert(executor.getSpec.getContainers.get(0).getImage === executorImage)
7373
assert(executor.getSpec.getContainers.get(0).getVolumeMounts.isEmpty)
7474
assert(executor.getSpec.getContainers.get(0).getResources.getLimits.size() === 1)
75+
assert(executor.getSpec.getContainers.get(0).getResources
76+
.getRequests.get("memory").getAmount === "1408Mi")
7577
assert(executor.getSpec.getContainers.get(0).getResources
7678
.getLimits.get("memory").getAmount === "1408Mi")
7779

0 commit comments

Comments
 (0)