Skip to content

Commit 78d1a31

Browse files
authored
Placement Policy removed from A3-Mega blueprints with --spot (#478)
* Placement Policy removed from A3-Mega blueprints with --spot * Kueue config updated to support optional topologyName * a3_mega_spot.yaml file * tcpx(o) container moved to initContainers
1 parent 757fa35 commit 78d1a31

File tree

13 files changed

+271
-26
lines changed

13 files changed

+271
-26
lines changed

src/xpk/blueprints/a3mega/kueue-xpk-configuration.yaml.tftpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ metadata:
1616
spec:
1717
nodeLabels:
1818
cloud.google.com/gke-accelerator: nvidia-h100-mega-80gb
19-
topologyName: "gke-default"
19+
${tas_name}
2020
---
2121
apiVersion: kueue.x-k8s.io/v1beta1
2222
kind: ClusterQueue

src/xpk/commands/batch.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,13 @@
2626
from ..core.kjob import (
2727
AppProfileDefaults,
2828
JobTemplateDefaults,
29-
Kueue_TAS_annotation,
3029
get_storage_annotations,
3130
prepare_kjob,
3231
)
3332
from ..core.kueue import LOCAL_QUEUE_NAME
3433
from ..utils.console import xpk_exit, xpk_print
3534
from .kind import set_local_cluster_command
36-
from .kjob_common import add_gpu_networking_annotations_to_command
35+
from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
3736

3837

3938
def batch(args: Namespace) -> None:
@@ -68,11 +67,11 @@ def submit_job(args: Namespace) -> None:
6867
'kubectl kjob create slurm'
6968
f' --profile {AppProfileDefaults.NAME.value}'
7069
f' --localqueue {LOCAL_QUEUE_NAME}'
71-
f' --pod-template-annotation {Kueue_TAS_annotation}'
7270
f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
7371
' --first-node-ip'
7472
)
7573
cmd = add_gpu_networking_annotations_to_command(args, cmd)
74+
cmd = add_TAS_annotations_to_command(args, cmd)
7675

7776
for annotation in get_storage_annotations(args):
7877
cmd += f' --pod-template-annotation {annotation}'

src/xpk/commands/common.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@
1515
"""
1616

1717
from ..core.commands import run_command_with_updates_retry
18+
from ..core.resources import get_cluster_capacity_type, get_cluster_system_characteristics
19+
from ..core.capacity import H100_MEGA_DEVICE_TYPE, CapacityType
1820
from ..core.gcloud_context import zone_to_region
19-
from ..utils.console import xpk_print
21+
from ..utils.console import xpk_print, xpk_exit
2022

2123

2224
def set_cluster_command(args) -> int:
@@ -42,3 +44,32 @@ def set_cluster_command(args) -> int:
4244
if return_code != 0:
4345
xpk_print(f'{task} returned ERROR {return_code}')
4446
return return_code
47+
48+
49+
def is_TAS_possible(args) -> bool:
50+
"""Check cluster's machine_type and capacity type to determine if Kueue TAS is possible
51+
52+
Args:
53+
args: user provided arguments for running the command.
54+
55+
Returns:
56+
True if possible and False otherwise.
57+
"""
58+
system_characteristics = get_cluster_system_characteristics(args)
59+
capacity_type = get_cluster_capacity_type(args)
60+
61+
if system_characteristics is None:
62+
xpk_print('system_characteristics data was not found in configmaps.')
63+
xpk_exit(1)
64+
65+
if capacity_type is None:
66+
xpk_print('capacity_type data was not found in configmaps.')
67+
xpk_exit(1)
68+
69+
if (
70+
system_characteristics.device_type == H100_MEGA_DEVICE_TYPE
71+
and capacity_type == CapacityType.SPOT
72+
):
73+
return False
74+
75+
return True

src/xpk/commands/kjob_common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
get_a3mega_pod_template_annotations,
2525
get_a3ultra_pod_template_annotations,
2626
get_a4_pod_template_annotations,
27+
Kueue_TAS_annotation,
2728
)
29+
from .common import is_TAS_possible
2830

2931

3032
def add_gpu_networking_annotations_to_command(args, cmd: str) -> str:
@@ -45,3 +47,10 @@ def add_gpu_networking_annotations_to_command(args, cmd: str) -> str:
4547
cmd += "\\\n".join(flags)
4648

4749
return cmd
50+
51+
52+
def add_TAS_annotations_to_command(args, cmd: str) -> str:
53+
if is_TAS_possible(args):
54+
cmd += f" --pod-template-annotation {Kueue_TAS_annotation}"
55+
56+
return cmd

src/xpk/commands/run.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,13 @@
2525
from ..core.kjob import (
2626
AppProfileDefaults,
2727
JobTemplateDefaults,
28-
Kueue_TAS_annotation,
2928
get_storage_annotations,
3029
prepare_kjob,
3130
)
3231
from ..core.kueue import LOCAL_QUEUE_NAME
3332
from ..utils.console import xpk_exit, xpk_print
3433
from .kind import set_local_cluster_command
35-
from .kjob_common import add_gpu_networking_annotations_to_command
34+
from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
3635

3736

3837
def run(args: Namespace) -> None:
@@ -64,12 +63,12 @@ def submit_job(args: Namespace) -> None:
6463
'kubectl kjob create slurm --profile'
6564
f' {AppProfileDefaults.NAME.value} '
6665
f' --localqueue {LOCAL_QUEUE_NAME} '
67-
f" --pod-template-annotation '{Kueue_TAS_annotation}'"
6866
f' --stream-container {JobTemplateDefaults.CONTAINER_NAME.value}'
6967
f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
7068
' --wait --rm --first-node-ip'
7169
)
7270
cmd = add_gpu_networking_annotations_to_command(args, cmd)
71+
cmd = add_TAS_annotations_to_command(args, cmd)
7372

7473
for annotation in get_storage_annotations(args):
7574
cmd += f' --pod-template-annotation {annotation}'

src/xpk/commands/workload.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
)
9090
from ..utils.console import get_user_input, xpk_exit, xpk_print
9191
from ..utils.file import write_tmp_file
92+
from .common import is_TAS_possible
9293
from . import cluster_gcluster
9394

9495
WORKLOAD_CREATE_YAML = """apiVersion: jobset.x-k8s.io/v1alpha2
@@ -213,7 +214,7 @@
213214
labels:
214215
xpk.google.com/workload: {args.workload}
215216
annotations:
216-
kueue.x-k8s.io/podset-preferred-topology: "cloud.google.com/gce-topology-host"
217+
{kueue_TAS_annotation}
217218
spec:
218219
priorityClassName: {args.priority}
219220
restartPolicy: Never
@@ -447,13 +448,21 @@ def workload_create(args) -> None:
447448
if return_code != 0:
448449
xpk_exit(return_code)
449450

451+
kueue_TAS_annotation = (
452+
'kueue.x-k8s.io/podset-preferred-topology:'
453+
' "cloud.google.com/gce-topology-host"'
454+
)
455+
if not is_TAS_possible(args):
456+
kueue_TAS_annotation = ''
457+
450458
if system.device_type in cluster_gcluster.supported_device_types:
451459
yml_string = A3_GPU_WORKLOAD_CREATE_YAML.format(
452460
args=args,
453461
container=container,
454462
service_account=XPK_SA,
455463
failure_policy_rules=failure_policy_rules,
456464
pod_failure_policy=pod_failure_policy,
465+
kueue_TAS_annotation=kueue_TAS_annotation,
457466
)
458467

459468
sub_networks = get_cluster_subnetworks(args)

src/xpk/core/blueprint/blueprint_generator.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def generate_a3_mega_blueprint(
180180
a3_megagpu_pool_0 = DeploymentModule(
181181
id="a3_megagpu_pool_0",
182182
source="modules/compute/gke-node-pool",
183-
use=["gke_cluster", gpu_subnets_name, "group_placement_0"],
183+
use=["gke_cluster", gpu_subnets_name],
184184
settings={
185185
"name": f"{cluster_name}-a3-megagpu-pool-0",
186186
"machine_type": system.gce_machine_type,
@@ -197,6 +197,9 @@ def generate_a3_mega_blueprint(
197197
},
198198
outputs=["instructions"],
199199
)
200+
201+
set_placement_policy = capacity_type != CapacityType.SPOT
202+
tas_name = "topologyName: 'gke-default'" if set_placement_policy else ""
200203
num_chips = num_nodes * system.chips_per_vm
201204
workload = DeploymentModule(
202205
id="workload_component_install",
@@ -207,7 +210,10 @@ def generate_a3_mega_blueprint(
207210
"install": True,
208211
"version": "v0.10.0", # TAS feature-gates is enabled in CT
209212
"config_path": f'$(ghpc_stage("{blueprint_name}"))/kueue-xpk-configuration.yaml.tftpl',
210-
"config_template_vars": {"num_chips": num_chips},
213+
"config_template_vars": {
214+
"num_chips": num_chips,
215+
"tas_name": tas_name,
216+
},
211217
},
212218
"jobset": {"install": True, "version": "v0.7.2"},
213219
"apply_manifests": [{
@@ -243,12 +249,16 @@ def generate_a3_mega_blueprint(
243249
primary_vpc,
244250
gpunets,
245251
gke_cluster,
246-
group_placement_0,
247252
a3_megagpu_pool_0,
248253
workload,
249254
workload_configmap,
250255
],
251256
)
257+
258+
if set_placement_policy:
259+
a3_megagpu_pool_0.use.append(group_placement_0.id)
260+
primary_group.modules.append(group_placement_0)
261+
252262
a3_mega_blueprint = Blueprint(
253263
terraform_backend_defaults=self._getblock_terraform_backend(
254264
gcs_bucket, cluster_name, prefix

src/xpk/core/resources.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,24 @@ def get_cluster_system_characteristics(args) -> SystemCharacteristics | None:
236236
return system
237237

238238
return None
239+
240+
241+
def get_cluster_capacity_type(args) -> CapacityType | None:
242+
"""Get systemCharcteristics based on the cluster resources configMap
243+
Args:
244+
args: user provided arguments for running the command.
245+
246+
Returns:
247+
returns system characteristics
248+
"""
249+
metadata_configmap_name = f'{args.cluster}-{CLUSTER_METADATA_CONFIGMAP}'
250+
cluster_config_map = get_cluster_configmap(args, metadata_configmap_name)
251+
252+
if cluster_config_map is None:
253+
return None
254+
255+
capacityValue = cluster_config_map.get('capacity_type')
256+
if capacityValue is not None:
257+
return CapacityType[capacityValue.upper()]
258+
259+
return None

src/xpk/core/tests/data/a3_mega.yaml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,6 @@ deployment_groups:
7070
gvnic_postfix: "-subnet"
7171
gvnic_start_index: 0
7272
outputs: [instructions]
73-
- !DeploymentModule
74-
id: group_placement_0
75-
source: modules/compute/resource-policy
76-
settings:
77-
name: bar-gp-np-0
78-
group_placement_max_distance: 2
7973

8074
- !DeploymentModule
8175
id: a3_megagpu_pool_0
@@ -108,6 +102,7 @@ deployment_groups:
108102
config_path: $(ghpc_stage("xpk-gke-a3-megagpu"))/kueue-xpk-configuration.yaml.tftpl
109103
config_template_vars:
110104
num_chips: 16
105+
tas_name: "topologyName: 'gke-default'"
111106
jobset:
112107
install: true
113108
version: v0.7.2
@@ -127,4 +122,11 @@ deployment_groups:
127122
cluster_config_name: "bar-metadata-configmap",
128123
capacity_type: "reservation",
129124
reservation: "test-reservation",
130-
}
125+
}
126+
127+
- !DeploymentModule
128+
id: group_placement_0
129+
source: modules/compute/resource-policy
130+
settings:
131+
name: bar-gp-np-0
132+
group_placement_max_distance: 2

0 commit comments

Comments
 (0)