CentML · chasecadet · Jun 9, 2025 · Jun 9, 2025
diff --git a/examples/.DS_Store b/examples/.DS_Store
diff --git a/examples/sdk/.DS_Store b/examples/sdk/.DS_Store
diff --git a/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py b/examples/sdk/general-inference/create_private_inference_auth_bearer_token.py
@@ -0,0 +1,36 @@
+import centml
+from centml.sdk.api import get_centml_client
+from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType
+
+
+
+def main():
+    with get_centml_client() as cclient:
+        token = cclient.get_user_vault(UserVaultType.BEARER_TOKENS)
+        request = CreateInferenceDeploymentRequest(
+            name="vllm",
+            cluster_id=1000,
+            hardware_instance_id=1000,
+            image_url="vllm",
+            port=8080,
+            min_scale=1,
+            max_scale=1,
+            endpoint_bearer_token=token["general-inference"], #token must exist in vault
+        )
+        response = cclient.create_inference(request)
+        print("Create deployment response: ", response)
+
+        ### Get deployment details
+        deployment = cclient.get_inference(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/create_inference.py → ...nce/create_private_inference_auth_cert.py b/examples/sdk/create_inference.py → ...nce/create_private_inference_auth_cert.py
@@ -8,14 +8,14 @@ def main():
         certs = cclient.get_user_vault(UserVaultType.CERTIFICATES)
 
         request = CreateInferenceDeploymentRequest(
-            name="nginx",
+            name="nginx-cert-private",
             cluster_id=1000,
             hardware_instance_id=1000,
             image_url="nginxinc/nginx-unprivileged",
             port=8080,
             min_scale=1,
             max_scale=1,
-            endpoint_certificate_authority=certs["my_cert"],
+            endpoint_certificate_authority=certs["my_cert"], #Cert must exist in vault
         )
         response = cclient.create_inference(request)
         print("Create deployment response: ", response)

diff --git a/examples/sdk/general-inference/create_public_inference.py b/examples/sdk/general-inference/create_public_inference.py
@@ -0,0 +1,34 @@
+import centml
+from centml.sdk.api import get_centml_client
+from centml.sdk import DeploymentType, CreateInferenceDeploymentRequest, UserVaultType
+
+
+def main():
+    with get_centml_client() as cclient:
+
+        request = CreateInferenceDeploymentRequest(
+            name="nginx",
+            cluster_id=1000,
+            hardware_instance_id=1000,
+            image_url="nginxinc/nginx-unprivileged",
+            port=8080,
+            min_scale=1,
+            max_scale=1,
+        )
+        response = cclient.create_inference(request)
+        print("Create deployment response: ", response)
+
+        ### Get deployment details
+        deployment = cclient.get_inference(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD b/examples/sdk/llm-endpoints/basic-examples/endpoint/ENDPOINT_README.MD
@@ -0,0 +1,58 @@
+# 🚀 CentML Endpoint Creator
+
+This script creates a CentML CServe V2 deployment for a given model, using either the fastest available hardware or a default hardware configuration. This is meant to be an example to help users work with the CentML SDK. 
+
+---
+
+## 📄 Script: `create_endpoint.py`
+
+### 🔧 What it does
+
+- Fetches the fastest available CServe recipe for the specified model
+- Builds a deployment request with appropriate cluster and hardware info
+- Optionally modifies recipe properties (e.g. `max_num_seqs`)
+- Submits the deployment via the CentML SDK
+- Prints the deployment response and metadata
+
+---
+
+## 🧰 Requirements
+
+- Python 3.8+
+- [CentML Python SDK](https://pypi.org/project/centml/)
+
+Install:
+
+```bash
+pip install centml
+```
+## Default behavior:
+
+    Uses the fastest recipe from get_cserve_recipe(...)
+
+    Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed
+
+You can adjust the model and deployment name here:
+```python
+qwen_config = get_fastest_cserve_config(
+    cclient,
+    name="qwen-fastest",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+Or use the default config instead:
+
+```python
+qwen_config = get_default_cserve_config(
+    cclient,
+    name="qwen-default",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+🧪 Running the Script
+
+`python3 create_endpoint.py`
+
+📬 Questions?
+
+Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches.
diff --git a/examples/sdk/create_cserve.py → ...asic-examples/endpoint/create_endpoint.py b/examples/sdk/create_cserve.py → ...asic-examples/endpoint/create_endpoint.py
diff --git a/examples/sdk/llm-endpoints/centml-tools/Makefile b/examples/sdk/llm-endpoints/centml-tools/Makefile
@@ -0,0 +1,23 @@
+CONFIG ?= test_config.json
+NAME ?=
+
+PYTHON := /opt/homebrew/bin/python3.11
+
+
+.PHONY: help deploy delete inspect
+
+help:
+	@echo "Available commands:"
+	@echo "  make deploy CONFIG=<config_file.json>"
+	@echo "  make delete CONFIG=<config_file.json>"
+	@echo "  make inspect NAME=<model_name> or CONFIG=<config_file.json>"
+
+deploy:
+	@echo "Using Python: $(PYTHON)"
+	$(PYTHON) scripts/deploy_model.py $(CONFIG)
+
+delete:
+	$(PYTHON) scripts/delete_deployment.py $(CONFIG)
+
+inspect:
+	$(PYTHON) scripts/inspect_model.py $(CONFIG)
diff --git a/examples/sdk/llm-endpoints/centml-tools/README.MD b/examples/sdk/llm-endpoints/centml-tools/README.MD
@@ -0,0 +1,22 @@
+# 🧠 CentML Deployment Tools
+
+This repository provides a simple Makefile interface to deploy, inspect, and delete model deployments using the CentML SDK.
+
+## 📦 Prerequisites
+
+- Python 3.11 (or update the `PYTHON` path in the Makefile)
+- The CentML Python SDK installed:  
+  ```bash
+  pip install centml
+## 🛠️ Usage
+Run any of the following commands from the project root:
+### 🚀 Deploy a Model
+`make deploy CONFIG=test_config.json` 
+Deploys a model based on the configuration in test_config.json.
+### 🔍 Inspect a Model
+`make inspect CONFIG="model_name"
+Displays model about the deployment with name model_name.
+Or, if you have the config file:
+`make inspect CONFIG=test_config.json`
+### Register a Deployment
+WIP 
diff --git a/examples/sdk/llm-endpoints/centml-tools/scripts/README.md b/examples/sdk/llm-endpoints/centml-tools/scripts/README.md
@@ -0,0 +1,148 @@
+# 🧠 CentML Deployment Toolkit
+
+This directory contains Python utilities to manage CentML model deployments, query available hardware, and clean up resources.
+
+---
+
+## 📦 Tools Overview
+
+| Script                  | Description                                                                 |
+|-------------------------|-----------------------------------------------------------------------------|
+| `deploy_model.py`       | Creates or updates a model deployment from a config JSON                   |
+| `delete_deployment.py`  | Deletes a deployment by name using a config JSON                           |
+| `inspect_model.py`      | Lists all available hardware and deployment recipes for a given model      |
+| `create_endpoint.py`    | Fetches the fastest  CServe recipe for the specified model and deploys it  |
+
+
+## 🚀 Deployment Script
+
+### 📄 File: `deploy_model.py`
+
+This script deploys a model to CentML using the CServe V2 API. It will:
+
+1. Load a JSON configuration file.
+2. Validate cluster and hardware instance availability.
+3. Check if the deployment already exists:
+   - If it does, it updates it.
+   - If it doesn't, it creates a new deployment.
+
+### ✅ JSON Config Example:
+
+```json
+{
+  "model": "meta-llama/Llama-3.2-3B-Instruct",
+  "deployment_name": "sample",
+  "hardware_instance_id": 1086,
+  "cluster_id": 1001,
+  "min_scale": 1,
+  "max_scale": 1,
+  "recipe": {
+    "model": "meta-llama/Llama-3.2-3B-Instruct",
+    "is_embedding_model": false,
+    "additional_properties": {
+      "tokenizer": "meta-llama/Llama-3.2-3B-Instruct",
+      "dtype": "auto",
+      "tensor_parallel_size": 1
+    }
+  }
+}
+
+
+### Usage
+
+`python3 deploy_model.py <config_file.json>`
+
+## Deletion Script
+
+### 📄 File: `delete_deployment.py` 
+This script deletes an existing deployment if one with the provided deployment_name exists.
+
+It does not raise an error if the deployment isn't found — it exits cleanly. Uses the same JSON config as the deploy_model.
+
+
+### Usage 
+`python3 delete_deployment.py <config_file.json>`
+
+
+## 🔍 Inspect Model Script
+### 📄 File: `inspect_model.py`
+Inspects available deployment recipes and hardware for a specific model.
+
+### Usage 
+`python3 inspect_model.py meta-llama/Llama-3.2-3B-Instruct`
+The script will:
+
+    List all recipe variants (e.g., fastest, cheapest)
+
+    Print detailed hardware specs for each variant
+
+    Display all available hardware options
+
+
+## 📄 File: `create_endpoint.py`
+
+### 🔧 What it does
+
+- Fetches the fastest available CServe recipe for the specified model
+- Builds a deployment request with appropriate cluster and hardware info
+- Optionally modifies recipe properties (e.g. `max_num_seqs`)
+- Submits the deployment via the CentML SDK
+- Prints the deployment response and metadata
+
+---
+
+## 🧰 Requirements
+
+- Python 3.8+
+- [CentML Python SDK](https://pypi.org/project/centml/)
+
+Install:
+
+```bash
+pip install centml
+```
+## Default behavior:
+
+    Uses the fastest recipe from get_cserve_recipe(...)
+
+    Falls back to hardcoded cluster ID 1001 in get_default_cserve_config(...) if needed
+
+You can adjust the model and deployment name here:
+```python
+qwen_config = get_fastest_cserve_config(
+    cclient,
+    name="qwen-fastest",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+Or use the default config instead:
+
+```python
+qwen_config = get_default_cserve_config(
+    cclient,
+    name="qwen-default",
+    model="Qwen/Qwen2-VL-7B-Instruct"
+)
+```
+🧪 Running the Script
+
+`python3 create_endpoint.py`
+
+
+
+
+
+## 🧰 Prerequisites
+
+* Python 3.8+
+* CentML Python SDK
+* Valid CentML credentials (e.g., via environment or local config)
+
+### Install Dependencies
+`pip install centml`
+
+
+
+📬 Questions?
+
+Reach out to the CentML team or maintainers if you encounter unexpected recipe/hardware mismatches.