Skip to content

Commit

Permalink
Merge pull request #446 from spack/runner-pod-node-selection
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt authored Aug 9, 2023
2 parents c6229ff + 76124fc commit bc5c309
Show file tree
Hide file tree
Showing 17 changed files with 297 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-graviton2 node pool
# Label all provisioned nodes as graviton 2
labels:
spack.io/node-pool: glr-graviton2
spack.io/pipeline: "true"
spack.io/graviton: "2"
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-graviton3 node pool
# Label all provisioned nodes as graviton 3
labels:
spack.io/node-pool: glr-graviton3
spack.io/pipeline: "true"
spack.io/graviton: "3"
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-graviton3 node pool
# Only provision nodes for pods specifying the pcluster-amzn2-glr-graviton3 node pool
labels:
spack.io/node-pool: pcluster-amzn2-glr-graviton3
spack.io/pipeline: "true"
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-x86-64-v2 node pool
# Label all provisioned nodes as x86-64-v2
labels:
spack.io/node-pool: glr-x86-64-v2
spack.io/pipeline: "true"
spack.io/x86_64: "v2" # highest supported x86_64 microarch version
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-x86-64-v3 node pool
# Label all provisioned nodes as x86-64-v3
labels:
spack.io/node-pool: glr-x86-64-v3
spack.io/pipeline: "true"
spack.io/x86_64: "v3" # highest supported x86_64 microarch version
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ spec:
operator: In
values: ["spot"]

# Only provision nodes for pods specifying the glr-x86-64-v4 node pool
# Label all provisioned nodes as x86-64-v4
labels:
spack.io/node-pool: glr-x86-64-v4
spack.io/pipeline: "true"
spack.io/x86_64: "v4" # highest supported x86_64 microarch version
36 changes: 31 additions & 5 deletions k8s/production/runners/protected/graviton/2/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,40 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on graviton 2 or 3 nodes
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["2", "3"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Weight this pod towards graviton 2 nodes over graviton 3 nodes
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 2
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["2"]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["3"]
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# This takes precedence over the above weights, prioritizing pod packing
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
weight = 4
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term]
topology_key = "topology.kubernetes.io/zone"
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term.label_selector]
Expand Down Expand Up @@ -98,8 +126,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-graviton2"
[[runners.kubernetes.volumes.secret]]
name = "spack-intermediate-ci-signing-key"
Expand Down
19 changes: 15 additions & 4 deletions k8s/production/runners/protected/graviton/3/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,22 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on only graviton 3 nodes
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["3"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
Expand Down Expand Up @@ -98,8 +111,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-graviton3"
[[runners.kubernetes.volumes.secret]]
name = "spack-intermediate-ci-signing-key"
Expand Down
41 changes: 36 additions & 5 deletions k8s/production/runners/protected/x86_64/v2/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,45 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on any node with x86_64 >= v2
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v2", "v3", "v4"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Weight this pod towards x86-64-v2 nodes
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 3
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v2"]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 2
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v3"]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v4"]
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
weight = 4
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term]
topology_key = "topology.kubernetes.io/zone"
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term.label_selector]
Expand Down Expand Up @@ -97,8 +130,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-x86-64-v2"
[[runners.kubernetes.volumes.secret]]
name = "spack-intermediate-ci-signing-key"
Expand Down
35 changes: 30 additions & 5 deletions k8s/production/runners/protected/x86_64/v3/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,39 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on any node with x86_64 >= v3
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v3", "v4"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Weight this pod towards x86-64-v3 nodes
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 2
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v3"]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v4"]
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
weight = 4
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term]
topology_key = "topology.kubernetes.io/zone"
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term.label_selector]
Expand Down Expand Up @@ -97,8 +124,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-x86-64-v3"
[[runners.kubernetes.volumes.secret]]
name = "spack-intermediate-ci-signing-key"
Expand Down
19 changes: 15 additions & 4 deletions k8s/production/runners/protected/x86_64/v4/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,22 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on any node with x86_64 = v4
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/x86_64"
operator = "In"
values = ["v4"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
Expand Down Expand Up @@ -97,8 +110,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-x86-64-v4"
[[runners.kubernetes.volumes.secret]]
name = "spack-intermediate-ci-signing-key"
Expand Down
36 changes: 31 additions & 5 deletions k8s/production/runners/public/graviton/2/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,40 @@ spec:
poll_timeout = 600 # ten minutes
service_account = "runner"
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity]
[runners.kubernetes.affinity.node_affinity]
# Schedule this pod on graviton 2 or 3 nodes
[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms]]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["2", "3"]
[[runners.kubernetes.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms.match_expressions]]
key = "spack.io/pipeline"
operator = "Exists"
# Weight this pod towards graviton 2 nodes over graviton 3 nodes
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 2
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["2"]
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
[[runners.kubernetes.affinity.node_affinity.preferred_during_scheduling_ignored_during_execution.preference.match_expressions]]
key = "spack.io/graviton"
operator = "In"
values = ["3"]
# Place pod close to other pipeline pods if possible ("pack" the pods tightly)
# This takes precedence over the above weights, prioritizing pod packing
# Docs: https://docs.gitlab.com/runner/executors/kubernetes.html#define-nodes-where-pods-are-scheduled
[runners.kubernetes.affinity.pod_affinity]
[[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution]]
weight = 1
weight = 4
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term]
topology_key = "topology.kubernetes.io/zone"
[runners.kubernetes.affinity.pod_affinity.preferred_during_scheduling_ignored_during_execution.pod_affinity_term.label_selector]
Expand Down Expand Up @@ -98,8 +126,6 @@ spec:
"metrics/spack_ci_stack_name" = "$SPACK_CI_STACK_NAME"
"metrics/spack_job_spec_pkg_name" = "$SPACK_JOB_SPEC_PKG_NAME"
"metrics/spack_spec_needs_rebuild" = "$SPACK_SPEC_NEEDS_REBUILD"
[runners.kubernetes.node_selector]
"spack.io/node-pool" = "glr-graviton2"
# default image
image: "busybox:1.32.0"
Expand Down
Loading

0 comments on commit bc5c309

Please sign in to comment.