Skip to content

Commit

Permalink
[CI] [GCP Cluster launcher] Use single SSH key for all tests (#40677)
Browse files Browse the repository at this point in the history
Previously a new key would be generated by the cluster launcher for each test run. This caused us to hit the limit for the number of SSH keys per project. This PR uses a single SSH key for all tests by storing a key in a GCS bucket and downloading it to each test cluster at startup.

Related issue number
Closes #40635

---------

Signed-off-by: Archit Kulkarni <architkulkarni@users.noreply.github.com>
Signed-off-by: Archit Kulkarni <archit@anyscale.com>
  • Loading branch information
architkulkarni authored Nov 13, 2023
1 parent 531cea8 commit dc945e0
Showing 1 changed file with 30 additions and 4 deletions.
34 changes: 30 additions & 4 deletions python/ray/autoscaler/launch_and_verify_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import boto3
import yaml
from google.cloud import storage

import ray

Expand Down Expand Up @@ -140,7 +141,7 @@ def override_docker_image(config_yaml, docker_image):
config_yaml["docker"] = docker_config


def download_ssh_key():
def download_ssh_key_aws():
"""Download the ssh key from the S3 bucket to the local machine."""
print("======================================")
print("Downloading ssh key...")
Expand All @@ -161,6 +162,32 @@ def download_ssh_key():
os.chmod(local_key_path, 0o400)


def download_ssh_key_gcp():
"""Download the ssh key from the google cloud bucket to the local machine."""
print("======================================")
print("Downloading ssh key from GCP...")

# Initialize the GCP storage client
client = storage.Client()

# Set the name of the GCS bucket and the blob (key) to download
bucket_name = "gcp-cluster-launcher-release-test-ssh-keys"
key_name = "ray-autoscaler_gcp_us-west1_anyscale-bridge-cd812d38_ubuntu_0.pem"

# Get the bucket and blob
bucket = client.get_bucket(bucket_name)
blob = bucket.get_blob(key_name)

# Download the blob to a local file
local_key_path = os.path.expanduser(f"~/.ssh/{key_name}")
if not os.path.exists(os.path.dirname(local_key_path)):
os.makedirs(os.path.dirname(local_key_path))
blob.download_to_filename(local_key_path)

# Set permissions on the key file
os.chmod(local_key_path, 0o400)


def cleanup_cluster(cluster_config):
"""
Clean up the cluster using the given cluster configuration file.
Expand Down Expand Up @@ -318,10 +345,9 @@ def run_ray_commands(cluster_config, retries, no_config_cache, num_expected_node

provider_type = config_yaml.get("provider", {}).get("type")
if provider_type == "aws":
download_ssh_key()
download_ssh_key_aws()
elif provider_type == "gcp":
print("======================================")
print("GCP provider detected. Skipping ssh key download step.")
download_ssh_key_gcp()
# Get the active account email
account_email = (
subprocess.run(
Expand Down

0 comments on commit dc945e0

Please sign in to comment.