InftyAI · InftyAI-Agent · Oct 9, 2024 · Oct 9, 2024
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
@@ -35,7 +35,8 @@ type BackendRuntimeArg struct {
 // BackendRuntimeSpec defines the desired state of BackendRuntime
 type BackendRuntimeSpec struct {
 	// Commands represents the default command of the backendRuntime.
-	Commands []string `json:"commands"`
+	// +optional
+	Commands []string `json:"commands,omitempty"`
 	// Image represents the default image registry of the backendRuntime.
 	// It will work together with version to make up a real image.
 	Image string `json:"image"`

diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -231,7 +231,6 @@ spec:
                   It will be appended to the image as a tag.
                 type: string
             required:
-            - commands
             - image
             - resources
             - version

diff --git a/docs/examples/sglang/playground.yaml b/docs/examples/sglang/playground.yaml
@@ -1,10 +1,10 @@
 apiVersion: inference.llmaz.io/v1alpha1
 kind: Playground
 metadata:
-  name: qwen2-05b
+  name: qwen2-0--5b
 spec:
   replicas: 1
   modelClaim:
-    modelName: qwen2-05b
+    modelName: qwen2-0--5b
   backendRuntimeConfig:
     name: sglang
diff --git a/docs/support-backends.md b/docs/support-backends.md
@@ -1,13 +1,13 @@
 # All Kinds of Supported Inference Backends
 
-## vLLM
+## llama.cpp
 
-[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs
+[llama.cpp](https://github.com/ggerganov/llama.cpp) is to enable LLM inference with minimal setup and state-of-the-art performance on a wide variety of hardware - locally and in the cloud.
 
 ## SGLang
 
 [SGLang](https://github.com/sgl-project/sglang) is yet another fast serving framework for large language models and vision language models.
 
-## llama.cpp
+## vLLM
 
-[llama.cpp](https://github.com/ggerganov/llama.cpp) is to enable LLM inference with minimal setup and state-of-the-art performance on a wide variety of hardware - locally and in the cloud.
+[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs