@@ -333,110 +333,6 @@ def get_total_chips_requested_from_args(
333
333
return num_chips
334
334
335
335
336
- def update_gke_cluster_with_clouddns (args ) -> int :
337
- """Run the GKE cluster update command for existing clusters and enable CloudDNS.
338
-
339
- Args:
340
- args: user provided arguments for running the command.
341
-
342
- Returns:
343
- 0 if successful and 1 otherwise.
344
- """
345
- command = (
346
- 'gcloud container clusters update'
347
- f' { args .cluster } --project={ args .project } '
348
- f' --region={ zone_to_region (args .zone )} '
349
- ' --cluster-dns=clouddns'
350
- ' --cluster-dns-scope=vpc'
351
- f' --cluster-dns-domain={ args .cluster } -domain'
352
- ' --quiet'
353
- )
354
- xpk_print ('Updating GKE cluster to use Cloud DNS, may take a while!' )
355
- return_code = run_command_with_updates (
356
- command , 'GKE Cluster Update to enable Cloud DNS' , args
357
- )
358
- if return_code != 0 :
359
- xpk_print (f'GKE Cluster Update request returned ERROR { return_code } ' )
360
- return 1
361
- return 0
362
-
363
-
364
- def upgrade_gke_control_plane_version (args , default_rapid_gke_version ) -> int :
365
- """Upgrade GKE cluster's control plane version before updating nodepools to use CloudDNS.
366
-
367
- Args:
368
- args: user provided arguments for running the command.
369
- default_rapid_gke_version: Rapid default version for the upgrade.
370
-
371
- Returns:
372
- 0 if successful and 1 otherwise.
373
- """
374
- command = (
375
- 'gcloud container clusters upgrade'
376
- f' { args .cluster } --project={ args .project } '
377
- f' --region={ zone_to_region (args .zone )} '
378
- f' --cluster-version={ default_rapid_gke_version } '
379
- ' --master'
380
- ' --quiet'
381
- )
382
- xpk_print ("Updating GKE cluster's control plane version, may take a while!" )
383
- return_code = run_command_with_updates (
384
- command ,
385
- 'GKE Cluster control plane version update to enable Cloud DNS' ,
386
- args ,
387
- )
388
- if return_code != 0 :
389
- xpk_print (
390
- "GKE cluster's control plane version update request returned"
391
- f' ERROR { return_code } '
392
- )
393
- return 1
394
- return 0
395
-
396
-
397
- def upgrade_gke_nodepools_version (args , default_rapid_gke_version ) -> int :
398
- """Upgrade nodepools in the cluster to default rapid gke version. Recreates the nodes.
399
-
400
- Args:
401
- args: user provided arguments for running the command.
402
- default_rapid_gke_version: Rapid default version for the upgrade.
403
-
404
- Returns:
405
- 0 if successful and 1 otherwise.
406
- """
407
- existing_node_pool_names , return_code = get_all_nodepools_programmatic (args )
408
- if return_code != 0 :
409
- xpk_print ('Listing all node pools failed!' )
410
- return return_code
411
-
412
- # Batch execution to upgrade node pools simultaneously
413
- commands = []
414
- task_names = []
415
- for node_pool_name in existing_node_pool_names :
416
- commands .append (
417
- 'gcloud container clusters upgrade'
418
- f' { args .cluster } --project={ args .project } '
419
- f' --region={ zone_to_region (args .zone )} '
420
- f' --cluster-version={ default_rapid_gke_version } '
421
- f' --node-pool={ node_pool_name } '
422
- ' --quiet'
423
- )
424
- task_names .append (f'Upgrading node pool { node_pool_name } .' )
425
-
426
- for i , command in enumerate (commands ):
427
- xpk_print (f'To complete { task_names [i ]} we are executing { command } ' )
428
- max_return_code = run_commands (
429
- commands , 'Update GKE node pools to default RAPID GKE version' , task_names
430
- )
431
- if max_return_code != 0 :
432
- xpk_print (
433
- 'GKE node pools update to default RAPID GKE version returned ERROR:'
434
- f' { max_return_code } '
435
- )
436
- return max_return_code
437
- return 0
438
-
439
-
440
336
def set_up_cluster_network_for_gpu (args , system : SystemCharacteristics ) -> int :
441
337
"""Set up GKE Cluster networks, subnets and firewall rules for A3/A3+.
442
338
Note: there are 4 NICs for GPU-GPU bw and 1 NIC for host in an A3 node,
@@ -1019,73 +915,6 @@ def get_all_clusters_programmatic(args) -> tuple[list[str], int]:
1019
915
return raw_cluster_output .splitlines (), 0
1020
916
1021
917
1022
- def is_cluster_using_clouddns (args ) -> bool :
1023
- """Checks if cluster is using CloudDNS.
1024
- Args:
1025
- args: user provided arguments for running the command.
1026
-
1027
- Returns:
1028
- True if cluster is using CloudDNS and False otherwise.
1029
- """
1030
- command = (
1031
- f'gcloud container clusters describe { args .cluster } '
1032
- f' --project={ args .project } --region={ zone_to_region (args .zone )} '
1033
- ' | grep "clusterDns: CLOUD_DNS"'
1034
- )
1035
- return_code , _ = run_command_for_value (
1036
- command ,
1037
- 'Check if Cloud DNS is enabled in cluster describe.' ,
1038
- args ,
1039
- )
1040
- if return_code == 0 :
1041
- xpk_print ('Cloud DNS is enabled on the cluster, no update needed.' )
1042
- return True
1043
- return False
1044
-
1045
-
1046
- def update_cluster_with_clouddns_if_necessary (args ) -> int :
1047
- """Updates a GKE cluster to use CloudDNS, if not enabled already.
1048
-
1049
- Args:
1050
- args: user provided arguments for running the command.
1051
-
1052
- Returns:
1053
- 0 if successful and error code otherwise.
1054
- """
1055
- all_clusters , return_code = get_all_clusters_programmatic (args )
1056
- if return_code > 0 :
1057
- xpk_print ('Listing all clusters failed!' )
1058
- return 1
1059
- if args .cluster in all_clusters :
1060
- # If cluster is already using clouddns, no update necessary!
1061
- if is_cluster_using_clouddns (args ):
1062
- return 0
1063
- cluster_update_return_code = update_gke_cluster_with_clouddns (args )
1064
- if cluster_update_return_code > 0 :
1065
- xpk_print ('Updating GKE cluster to use CloudDNS failed!' )
1066
- return cluster_update_return_code
1067
-
1068
- # Find default rapid control plane version and update the control plane to the same.
1069
- server_config_return_code , gke_server_config = get_gke_server_config (args )
1070
- if server_config_return_code != 0 :
1071
- xpk_exit (server_config_return_code )
1072
- upgrade_master_return_code = upgrade_gke_control_plane_version (
1073
- args , gke_server_config .default_rapid_gke_version
1074
- )
1075
- if upgrade_master_return_code > 0 :
1076
- xpk_print ("Updating GKE cluster's control plane upgrade failed!" )
1077
- return upgrade_master_return_code
1078
-
1079
- # Upgrade nodepools version after the master upgrade.
1080
- node_pool_update_code = upgrade_gke_nodepools_version (
1081
- args , gke_server_config .default_rapid_gke_version
1082
- )
1083
- if node_pool_update_code > 0 :
1084
- xpk_print ('Upgrading nodepools version failed!' )
1085
- return node_pool_update_code
1086
- return 0
1087
-
1088
-
1089
918
def get_nodepool_zone (args , nodepool_name ) -> tuple [int , str ]:
1090
919
"""Return zone in which nodepool exists in the cluster.
1091
920
0 commit comments