Merge pull request #220 from runpod/enhanced-deploy

Enhanced deploy
runpod · Nov 13, 2023 · 69fe168 · 69fe168
2 parents 34ac99c + 70bdf37
commit 69fe168
Show file tree

Hide file tree

Showing 11 changed files with 354 additions and 100 deletions.
diff --git a/examples/api/get_endpoints.py b/examples/api/get_endpoints.py
@@ -0,0 +1,8 @@
+""" Get all endpoints from the API """
+
+
+import runpod
+
+endpoints = runpod.get_endpoints()
+
+print(endpoints)
diff --git a/runpod/__init__.py b/runpod/__init__.py
@@ -13,7 +13,7 @@
     get_gpu, get_gpus,
     get_pod, get_pods, create_pod, stop_pod, resume_pod, terminate_pod,
     create_template,
-    create_endpoint
+    get_endpoints, create_endpoint, update_endpoint_template
 )
 from .cli.groups.config.functions import set_credentials, check_credentials, get_credentials
 

diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py
@@ -9,6 +9,7 @@
 from .mutations import user as user_mutations
 from .queries import gpus
 from .queries import pods as pod_queries
+from .queries import endpoints as endpoint_queries
 from .graphql import run_graphql_query
 from .mutations import pods as pod_mutations
 from .mutations import endpoints as endpoint_mutations
@@ -228,6 +229,14 @@ def create_template(
 
     return raw_response["data"]["saveTemplate"]
 
+def get_endpoints() -> dict:
+    '''
+    Get all endpoints
+    '''
+    raw_return = run_graphql_query(endpoint_queries.QUERY_ENDPOINT)
+    cleaned_return = raw_return["data"]["myself"]["endpoints"]
+    return cleaned_return
+
 def create_endpoint(
         name:str, template_id:str, gpu_ids:str="AMPERE_16",
         network_volume_id:str=None, locations:str=None,
@@ -262,3 +271,25 @@ def create_endpoint(
     )
 
     return raw_response["data"]["saveEndpoint"]
+
+
+def update_endpoint_template(
+        endpoint_id:str, template_id:str
+):
+    '''
+    Update an endpoint template
+
+    :param endpoint_id: the id of the endpoint
+    :param template_id: the id of the template to use for the endpoint
+
+    :example:
+
+    >>> endpoint_id = runpod.update_endpoint_template("test", "template_id")
+    '''
+    raw_response = run_graphql_query(
+        endpoint_mutations.update_endpoint_template_mutation(
+            endpoint_id, template_id
+        )
+    )
+
+    return raw_response["data"]["updateEndpointTemplate"]
diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py
@@ -2,11 +2,12 @@
 
 # pylint: disable=too-many-arguments
 
+
 def generate_endpoint_mutation(
-    name:str, template_id:str, gpu_ids:str="AMPERE_16",
-    network_volume_id:str=None, locations:str=None,
-    idle_timeout:int=5, scaler_type:str="QUEUE_DELAY", scaler_value:int=4,
-    workers_min:int=0, workers_max:int=3
+    name: str, template_id: str, gpu_ids: str = "AMPERE_16",
+    network_volume_id: str = None, locations: str = None,
+    idle_timeout: int = 5, scaler_type: str = "QUEUE_DELAY", scaler_value: int = 4,
+    workers_min: int = 0, workers_max: int = 3
 ):
     """ Generate a string for a GraphQL mutation to create a new endpoint. """
     input_fields = []
@@ -57,3 +58,26 @@ def generate_endpoint_mutation(
         }}
     }}
     """
+
+
+def update_endpoint_template_mutation(
+    endpoint_id: str, template_id: str
+):
+    """ Generate a string for a GraphQL mutation to update an existing endpoint's template. """
+    input_fields = []
+
+    # ------------------------------ Required Fields ----------------------------- #
+    input_fields.append(f'templateId: "{template_id}"')
+    input_fields.append(f'endpointId: "{endpoint_id}"')
+
+    # Format the input fields into a string
+    input_fields_string = ", ".join(input_fields)
+    result = f"""
+    mutation {{
+        updateEndpointTemplate(input: {{{input_fields_string}}}) {{
+            id
+            templateId
+        }}
+    }}
+    """
+    return result
diff --git a/runpod/api/queries/endpoints.py b/runpod/api/queries/endpoints.py
@@ -0,0 +1,36 @@
+""" GraphQL queries for endpoints. """
+
+QUERY_ENDPOINT = """
+query Query {
+  myself {
+    endpoints {
+      aiKey
+      gpuIds
+      id
+      idleTimeout
+      name
+      networkVolumeId
+      locations
+      scalerType
+      scalerValue
+      templateId
+      type
+      userId
+      version
+      workersMax
+      workersMin
+      workersStandby
+      gpuCount
+      env {
+        key
+        value
+      }
+      createdAt
+      networkVolume {
+        id
+        dataCenterId
+      }
+    }
+  }
+}
+"""
diff --git a/runpod/cli/groups/project/functions.py b/runpod/cli/groups/project/functions.py
@@ -5,21 +5,59 @@
 import os
 import sys
 import uuid
+from datetime import datetime
 
 import tomlkit
 from tomlkit import document, comment, table, nl
 
-from runpod import __version__, get_pod, create_template, create_endpoint
+from runpod import __version__, get_pod, create_template, create_endpoint, update_endpoint_template
 from runpod.cli import BASE_DOCKER_IMAGE, GPU_TYPES, ENV_VARS
 from runpod.cli.utils.ssh_cmd import SSHConnection
-from .helpers import get_project_pod, copy_template_files, attempt_pod_launch, load_project_config
+from .helpers import (
+    get_project_pod, copy_template_files,
+    attempt_pod_launch, load_project_config, get_project_endpoint
+)
 from ...utils.rp_sync import sync_directory
 
 STARTER_TEMPLATES = os.path.join(os.path.dirname(__file__), 'starter_templates')
 
-# -------------------------------- New Project ------------------------------- #
+
+def _launch_dev_pod():
+    """ Launch a development pod. """
+    config = load_project_config()  # Load runpod.toml
+
+    print("Deploying development pod on RunPod...")
+
+    # Prepare the environment variables
+    environment_variables = {"RUNPOD_PROJECT_ID": config["project"]["uuid"]}
+    for variable in config['project'].get('env_vars', {}):
+        environment_variables[variable] = config['project']['env_vars'][variable]
+
+    # Prepare the GPU types
+    selected_gpu_types = config['project'].get('gpu_types', [])
+    if config['project'].get('gpu', None):
+        selected_gpu_types.append(config['project']['gpu'])
+
+    # Attempt to launch a pod with the given configuration
+    new_pod = attempt_pod_launch(config, environment_variables)
+    if new_pod is None:
+        print("Selected GPU types unavailable, try again later or use a different type.")
+        return None
+
+    print("Waiting for pod to come online... ", end="")
+    sys.stdout.flush()
+
+    # Wait for the pod to come online
+    while new_pod.get('desiredStatus', None) != 'RUNNING' or new_pod.get('runtime') is None:
+        new_pod = get_pod(new_pod['id'])
+
+    project_pod_id = new_pod['id']
+
+    print(f"Project {config['project']['name']} pod ({project_pod_id}) created.", end="\n\n")
+    return project_pod_id
 
 
+# -------------------------------- New Project ------------------------------- #
 def create_new_project(project_name, runpod_volume_id, cuda_version, python_version,  # pylint: disable=too-many-locals, too-many-arguments, too-many-statements
                        model_type=None, model_name=None, init_current_dir=False):
     """ Create a new project. """
@@ -124,63 +162,40 @@ def start_project():  # pylint: disable=too-many-locals, too-many-branches
 
     # Check if the project pod already exists, if not create it.
     if not project_pod_id:
+        project_pod_id = _launch_dev_pod()
 
-        print("Deploying development pod on RunPod...")
-
-        # Prepare the environment variables
-        environment_variables = {"RUNPOD_PROJECT_ID": config["project"]["uuid"]}
-        for variable in config['project'].get('env_vars', {}):
-            environment_variables[variable] = config['project']['env_vars'][variable]
-
-        # Prepare the GPU types
-        selected_gpu_types = config['project'].get('gpu_types', [])
-        if config['project'].get('gpu', None):
-            selected_gpu_types.append(config['project']['gpu'])
-
-        # Attempt to launch a pod with the given configuration
-        new_pod = attempt_pod_launch(config, environment_variables)
-        if new_pod is None:
-            print("Selected GPU types unavailable, try again later or use a different type.")
-            return
-
-        print("Waiting for pod to come online... ", end="")
-        sys.stdout.flush()
-
-        # Wait for the pod to come online
-        while new_pod.get('desiredStatus', None) != 'RUNNING' or new_pod.get('runtime') is None:
-            new_pod = get_pod(new_pod['id'])
-
-        project_pod_id = new_pod['id']
-
-        print(f"Project {config['project']['name']} pod ({project_pod_id}) created.", end="\n\n")
+    if project_pod_id is None:
+        return
 
     with SSHConnection(project_pod_id) as ssh_conn:
 
         project_path_uuid = f'{config["project"]["volume_mount_path"]}/{config["project"]["uuid"]}'
-        remote_project_path = os.path.join(project_path_uuid, config["project"]["name"])
+        project_path_uuid_dev = os.path.join(project_path_uuid, 'dev')
+        project_path_uuid_prod = os.path.join(project_path_uuid, 'prod')
+        remote_project_path = os.path.join(project_path_uuid_dev, config["project"]["name"])
 
         # Create the project folder on the pod
         print(f'Checking pod project folder: {remote_project_path} on pod {project_pod_id}')
-        ssh_conn.run_commands([f'mkdir -p {remote_project_path}'])
+        ssh_conn.run_commands([f'mkdir -p {remote_project_path} {project_path_uuid_prod}'])
 
         # Copy local files to the pod project folder
         print(f'Syncing files to pod {project_pod_id}')
-        ssh_conn.rsync(os.getcwd(), project_path_uuid)
+        ssh_conn.rsync(os.getcwd(), project_path_uuid_dev)
 
         # Create the virtual environment
-        venv_path = os.path.join(project_path_uuid, "venv")
+        venv_path = os.path.join(project_path_uuid_dev, "venv")
         print(f'Activating Python virtual environment: {venv_path} on pod {project_pod_id}')
         commands = [
             f'python{config["runtime"]["python_version"]} -m venv {venv_path}',
             f'source {venv_path}/bin/activate &&'
             f'cd {remote_project_path} &&'
             'python -m pip install --upgrade pip &&'
-            f'python -m pip install --requirement {config["runtime"]["requirements_path"]}'
+            f'python -m pip install -v --requirement {config["runtime"]["requirements_path"]}'
         ]
         ssh_conn.run_commands(commands)
 
         # Start the watcher and then start the API development server
-        sync_directory(ssh_conn, os.getcwd(), project_path_uuid)
+        sync_directory(ssh_conn, os.getcwd(), project_path_uuid_dev)
 
         project_name = config["project"]["name"]
         pip_req_path = os.path.join(remote_project_path, config['runtime']['requirements_path'])
@@ -217,14 +232,14 @@ def start_project():  # pylint: disable=too-many-locals, too-many-branches
 
             trap cleanup EXIT SIGINT
 
-            if source {project_path_uuid}/venv/bin/activate; then
+            if source {project_path_uuid_dev}/venv/bin/activate; then
                 echo -e "- Activated virtual environment."
             else
                 echo "Failed to activate virtual environment."
                 exit 1
             fi
 
-            if cd {project_path_uuid}/{project_name}; then
+            if cd {project_path_uuid_dev}/{project_name}; then
                 echo -e "- Changed to project directory."
             else
                 echo "Failed to change directory."
@@ -287,35 +302,74 @@ def start_project():  # pylint: disable=too-many-locals, too-many-branches
 # ------------------------------ Deploy Project ------------------------------ #
 def create_project_endpoint():
     """ Create a project endpoint.
+    - Move code in dev to prod folder
+    - TODO: git commit the diff from current state to new state
     - Create a serverless template for the project
     - Create a new endpoint using the template
     """
     config = load_project_config()
+    project_pod_id = get_project_pod(config['project']['uuid'])
+
+    # Check if the project pod already exists, if not create it.
+    if not project_pod_id:
+        project_pod_id = _launch_dev_pod()
+
+    if project_pod_id is None:
+        return None
+
+    with SSHConnection(project_pod_id) as ssh_conn:
+        project_path_uuid = f'{config["project"]["volume_mount_path"]}/{config["project"]["uuid"]}'
+        project_path_uuid_prod = os.path.join(project_path_uuid, 'prod')
+        remote_project_path = os.path.join(project_path_uuid_prod, config["project"]["name"])
+
+        # Copy local files to the pod project folder
+        ssh_conn.run_commands([f'mkdir -p {remote_project_path}'])
+        print(f'Syncing files to pod {project_pod_id} prod')
+        ssh_conn.rsync(os.getcwd(), project_path_uuid_prod)
+
+        # Create the virtual environment
+        venv_path = os.path.join(project_path_uuid_prod, 'venv')
+        print(f'Activating Python virtual environment: {venv_path} on pod {project_pod_id}')
+        commands = [
+            f'python{config["runtime"]["python_version"]} -m venv {venv_path}',
+            f'source {venv_path}/bin/activate &&'
+            f'cd {remote_project_path} &&'
+            'python -m pip install --upgrade pip &&'
+            f'python -m pip install -v --requirement {config["runtime"]["requirements_path"]}'
+        ]
+        ssh_conn.run_commands(commands)
+        ssh_conn.close()
 
     environment_variables = {}
     for variable in config['project']['env_vars']:
         environment_variables[variable] = config['project']['env_vars'][variable]
 
     # Construct the docker start command
-    docker_start_cmd_prefix = 'bash -c "'
-    activate_cmd = f'. /runpod-volume/{config["project"]["uuid"]}/venv/bin/activate'
-    python_cmd = f'python -u /runpod-volume/{config["project"]["uuid"]}/{config["project"]["name"]}/{config["runtime"]["handler_path"]}'  # pylint: disable=line-too-long
-    docker_start_cmd_suffix = '"'
-    docker_start_cmd = docker_start_cmd_prefix + activate_cmd + ' && ' + \
-        python_cmd + docker_start_cmd_suffix  # pylint: disable=line-too-long
+    activate_cmd = f'. /runpod-volume/{config["project"]["uuid"]}/prod/venv/bin/activate'
+    python_cmd = f'python -u /runpod-volume/{config["project"]["uuid"]}/prod/{config["project"]["name"]}/{config["runtime"]["handler_path"]}'  # pylint: disable=line-too-long
+    docker_start_cmd = 'bash -c "' + activate_cmd + ' && ' + python_cmd + '"'
 
     project_endpoint_template = create_template(
-        name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}',
+        name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]} | {datetime.now()}',  # pylint: disable=line-too-long
         image_name=config['project']['base_image'],
         container_disk_in_gb=config['project']['container_disk_size_gb'],
         docker_start_cmd=docker_start_cmd,
         env=environment_variables, is_serverless=True
     )
 
-    deployed_endpoint = create_endpoint(
-        name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}',
-        template_id=project_endpoint_template['id'],
-        network_volume_id=config['project']['storage_id'],
-    )
+    deployed_endpoint = get_project_endpoint(config['project']['uuid'])
+    if not deployed_endpoint:
+        deployed_endpoint = create_endpoint(
+            name=f'{config["project"]["name"]}-endpoint | {config["project"]["uuid"]}',
+            template_id=project_endpoint_template['id'],
+            network_volume_id=config['project']['storage_id'],
+        )
+    else:
+        deployed_endpoint = update_endpoint_template(
+            endpoint_id=deployed_endpoint['id'],
+            template_id=project_endpoint_template['id'],
+        )
+
+    # does user want to tear down and recreate workers immediately?
 
     return deployed_endpoint['id']