feat: config hot-reload - fix merge conflicts

sammcj · sammcj · commit 17662922a296 · 2025-05-09T12:21:19.000+10:00
diff --git a/.coderabbit.yaml b/.coderabbit.yaml
@@ -0,0 +1,15 @@
+# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
+language: "en-US"
+early_access: false
+reviews:
+  profile: "chill"
+  request_changes_workflow: false
+  high_level_summary: true
+  poem: false
+  review_status: true
+  collapse_walkthrough: false
+  auto_review:
+    enabled: true
+    drafts: false
+chat:
+  auto_reply: true
diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml
@@ -15,7 +15,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        platform: [intel, cuda, vulkan, cpu, musa]
+        #platform: [intel, cuda, vulkan, cpu, musa]
+        platform: [cuda, vulkan, cpu, musa]
       fail-fast: false
     steps:
       - name: Checkout code
diff --git a/README.md b/README.md
@@ -70,6 +70,14 @@ healthCheckTimeout: 60
 # Valid log levels: debug, info (default), warn, error
 logLevel: info
 
+# Automatic Port Values
+# use ${PORT} in model.cmd and model.proxy to use an automatic port number
+# when you use ${PORT} you can omit a custom model.proxy value, as it will
+# default to http://localhost:${PORT}
+
+# override the default port (5800) for automatic port values
+startPort: 10001
+
 # define valid model values and the upstream server start
 models:
   "llama":
@@ -83,6 +91,7 @@ models:
       - "CUDA_VISIBLE_DEVICES=0"
 
     # where to reach the server started by cmd, make sure the ports match
+    # can be omitted if you use an automatic ${PORT} in cmd
     proxy: http://127.0.0.1:8999
 
     # aliases names to use this model for
@@ -109,14 +118,14 @@ models:
   # but they can still be requested as normal
   "qwen-unlisted":
     unlisted: true
-    cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
+    cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
 
   # Docker Support (v26.1.4+ required!)
   "docker-llama":
-    proxy: "http://127.0.0.1:9790"
+    proxy: "http://127.0.0.1:${PORT}"
     cmd: >
       docker run --name dockertest
-      --init --rm -p 9790:8080 -v /mnt/nvme/models:/models
+      --init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
       ghcr.io/ggerganov/llama.cpp:server
       --model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
 
@@ -180,11 +189,6 @@ groups:
 - [Speculative Decoding](examples/speculative-decoding/README.md) - using a small draft model can increase inference speeds from 20% to 40%. This example includes a configurations Qwen2.5-Coder-32B (2.5x increase) and Llama-3.1-70B (1.4x increase) in the best cases.
 - [Optimizing Code Generation](examples/benchmark-snakegame/README.md) - find the optimal settings for your machine. This example demonstrates defining multiple configurations and testing which one is fastest.
 - [Restart on Config Change](examples/restart-on-config-change/README.md) - automatically restart llama-swap when trying out different configurations.
-
-## Configuration
-
-llama-s
-
 </details>
 
 ## Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))
diff --git a/misc/simple-responder/simple-responder.go b/misc/simple-responder/simple-responder.go
@@ -33,14 +33,17 @@ func main() {
 
 	// Set up the handler function using the provided response message
 	r.POST("/v1/chat/completions", func(c *gin.Context) {
-		c.Header("Content-Type", "text/plain")
+		c.Header("Content-Type", "application/json")
 
 		// add a wait to simulate a slow query
 		if wait, err := time.ParseDuration(c.Query("wait")); err == nil {
 			time.Sleep(wait)
 		}
 
-		c.String(200, *responseMessage)
+		c.JSON(http.StatusOK, gin.H{
+			"responseMessage":  *responseMessage,
+			"h_content_length": c.Request.Header.Get("Content-Length"),
+		})
 	})
 
 	// for issue #62 to check model name strips profile slug
@@ -63,8 +66,11 @@ func main() {
 	})
 
 	r.POST("/v1/completions", func(c *gin.Context) {
-		c.Header("Content-Type", "text/plain")
-		c.String(200, *responseMessage)
+		c.Header("Content-Type", "application/json")
+		c.JSON(http.StatusOK, gin.H{
+			"responseMessage": *responseMessage,
+		})
+
 	})
 
 	// issue #41
@@ -104,6 +110,10 @@ func main() {
 		c.JSON(http.StatusOK, gin.H{
 			"text":  fmt.Sprintf("The length of the file is %d bytes", fileSize),
 			"model": model,
+
+			// expose some header values for testing
+			"h_content_type":   c.GetHeader("Content-Type"),
+			"h_content_length": c.GetHeader("Content-Length"),
 		})
 	})
 
diff --git a/proxy/config.go b/proxy/config.go
@@ -2,8 +2,10 @@ package proxy
 
 import (
 	"fmt"
+	"io"
 	"os"
 	"sort"
+	"strconv"
 	"strings"
 
 	"github.com/google/shlex"
@@ -62,6 +64,9 @@ type Config struct {
 
 	// map aliases to actual model IDs
 	aliases map[string]string
+
+	// automatic port assignments
+	StartPort int `yaml:"startPort"`
 }
 
 func (c *Config) RealModelName(search string) (string, bool) {
@@ -83,7 +88,16 @@ func (c *Config) FindConfig(modelName string) (ModelConfig, string, bool) {
 }
 
 func LoadConfig(path string) (Config, error) {
-	data, err := os.ReadFile(path)
+	file, err := os.Open(path)
+	if err != nil {
+		return Config{}, err
+	}
+	defer file.Close()
+	return LoadConfigFromReader(file)
+}
+
+func LoadConfigFromReader(r io.Reader) (Config, error) {
+	data, err := io.ReadAll(r)
 	if err != nil {
 		return Config{}, err
 	}
@@ -98,14 +112,50 @@ func LoadConfig(path string) (Config, error) {
 		config.HealthCheckTimeout = 15
 	}
 
+	// set default port ranges
+	if config.StartPort == 0 {
+		// default to 5800
+		config.StartPort = 5800
+	} else if config.StartPort < 1 {
+		return Config{}, fmt.Errorf("startPort must be greater than 1")
+	}
+
 	// Populate the aliases map
 	config.aliases = make(map[string]string)
 	for modelName, modelConfig := range config.Models {
 		for _, alias := range modelConfig.Aliases {
+			if _, found := config.aliases[alias]; found {
+				return Config{}, fmt.Errorf("duplicate alias %s found in model: %s", alias, modelName)
+			}
 			config.aliases[alias] = modelName
 		}
 	}
 
+	// iterate over the models and replace any ${PORT} with the next available port
+	// Get and sort all model IDs first, makes testing more consistent
+	modelIds := make([]string, 0, len(config.Models))
+	for modelId := range config.Models {
+		modelIds = append(modelIds, modelId)
+	}
+	sort.Strings(modelIds) // This guarantees stable iteration order
+
+	// iterate over the sorted models
+	nextPort := config.StartPort
+	for _, modelId := range modelIds {
+		modelConfig := config.Models[modelId]
+		if strings.Contains(modelConfig.Cmd, "${PORT}") {
+			modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort))
+			if modelConfig.Proxy == "" {
+				modelConfig.Proxy = fmt.Sprintf("http://localhost:%d", nextPort)
+			} else {
+				modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, "${PORT}", strconv.Itoa(nextPort))
+			}
+			nextPort++
+			config.Models[modelId] = modelConfig
+		} else if modelConfig.Proxy == "" {
+			return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId)
+		}
+	}
 	config = AddDefaultGroupToConfig(config)
 	// check that members are all unique in the groups
 	memberUsage := make(map[string]string) // maps member to group it appears in
diff --git a/proxy/config_test.go b/proxy/config_test.go
@@ -3,6 +3,7 @@ package proxy
 import (
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -43,6 +44,7 @@ models:
     checkEndpoint: "/"
   model4:
     cmd: path/to/cmd --arg1 one
+    proxy: "http://localhost:8082"
     checkEndpoint: "/"
 
 healthCheckTimeout: 15
@@ -73,6 +75,7 @@ groups:
 	}
 
 	expected := Config{
+		StartPort: 5800,
 		Models: map[string]ModelConfig{
 			"model1": {
 				Cmd:           "path/to/cmd --arg1 one",
@@ -97,6 +100,7 @@ groups:
 			},
 			"model4": {
 				Cmd:           "path/to/cmd --arg1 one",
+				Proxy:         "http://localhost:8082",
 				CheckEndpoint: "/",
 			},
 		},
@@ -138,14 +142,6 @@ groups:
 }
 
 func TestConfig_GroupMemberIsUnique(t *testing.T) {
-	// Create a temporary YAML file for testing
-	tempDir, err := os.MkdirTemp("", "test-config")
-	if err != nil {
-		t.Fatalf("Failed to create temporary directory: %v", err)
-	}
-	defer os.RemoveAll(tempDir)
-
-	tempFile := filepath.Join(tempDir, "config.yaml")
 	content := `
 models:
   model1:
@@ -171,15 +167,35 @@ groups:
     exclusive: false
     members: ["model2"]
 `
+	// Load the config and verify
+	_, err := LoadConfigFromReader(strings.NewReader(content))
 
-	if err := os.WriteFile(tempFile, []byte(content), 0644); err != nil {
-		t.Fatalf("Failed to write temporary file: %v", err)
-	}
+	// a Contains as order of the map is not guaranteed
+	assert.Contains(t, err.Error(), "model member model2 is used in multiple groups:")
+}
 
+func TestConfig_ModelAliasesAreUnique(t *testing.T) {
+	content := `
+models:
+  model1:
+    cmd: path/to/cmd --arg1 one
+    proxy: "http://localhost:8080"
+    aliases:
+      - m1
+  model2:
+    cmd: path/to/cmd --arg1 one
+    proxy: "http://localhost:8081"
+    checkEndpoint: "/"
+    aliases:
+      - m1
+      - m2
+`
 	// Load the config and verify
-	_, err = LoadConfig(tempFile)
-	assert.NotNil(t, err)
+	_, err := LoadConfigFromReader(strings.NewReader(content))
 
+	// this is a contains because it could be `model1` or `model2` depending on the order
+	// go decided on the order of the map
+	assert.Contains(t, err.Error(), "duplicate alias m1 found in model: model")
 }
 
 func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
@@ -269,3 +285,77 @@ func TestConfig_SanitizeCommand(t *testing.T) {
 	assert.Error(t, err)
 	assert.Nil(t, args)
 }
+
+func TestConfig_AutomaticPortAssignments(t *testing.T) {
+
+	t.Run("Default Port Ranges", func(t *testing.T) {
+		content := ``
+		config, err := LoadConfigFromReader(strings.NewReader(content))
+		if !assert.NoError(t, err) {
+			t.Fatalf("Failed to load config: %v", err)
+		}
+
+		assert.Equal(t, 5800, config.StartPort)
+	})
+	t.Run("User specific port ranges", func(t *testing.T) {
+		content := `startPort: 1000`
+		config, err := LoadConfigFromReader(strings.NewReader(content))
+		if !assert.NoError(t, err) {
+			t.Fatalf("Failed to load config: %v", err)
+		}
+
+		assert.Equal(t, 1000, config.StartPort)
+	})
+
+	t.Run("Invalid start port", func(t *testing.T) {
+		content := `startPort: abcd`
+		_, err := LoadConfigFromReader(strings.NewReader(content))
+		assert.NotNil(t, err)
+	})
+
+	t.Run("start port must be greater than 1", func(t *testing.T) {
+		content := `startPort: -99`
+		_, err := LoadConfigFromReader(strings.NewReader(content))
+		assert.NotNil(t, err)
+	})
+
+	t.Run("Automatic port assignments", func(t *testing.T) {
+		content := `
+startPort: 5800
+models:
+  model1:
+    cmd: svr --port ${PORT}
+  model2:
+    cmd: svr --port ${PORT}
+    proxy: "http://172.11.22.33:${PORT}"
+  model3:
+    cmd: svr --port 1999
+    proxy: "http://1.2.3.4:1999"
+`
+		config, err := LoadConfigFromReader(strings.NewReader(content))
+		if !assert.NoError(t, err) {
+			t.Fatalf("Failed to load config: %v", err)
+		}
+
+		assert.Equal(t, 5800, config.StartPort)
+		assert.Equal(t, "svr --port 5800", config.Models["model1"].Cmd)
+		assert.Equal(t, "http://localhost:5800", config.Models["model1"].Proxy)
+
+		assert.Equal(t, "svr --port 5801", config.Models["model2"].Cmd)
+		assert.Equal(t, "http://172.11.22.33:5801", config.Models["model2"].Proxy)
+
+		assert.Equal(t, "svr --port 1999", config.Models["model3"].Cmd)
+		assert.Equal(t, "http://1.2.3.4:1999", config.Models["model3"].Proxy)
+
+	})
+
+	t.Run("Proxy value required if no ${PORT} in cmd", func(t *testing.T) {
+		content := `
+models:
+  model1:
+    cmd: svr --port 111
+`
+		_, err := LoadConfigFromReader(strings.NewReader(content))
+		assert.Equal(t, "model model1 requires a proxy value when not using automatic ${PORT}", err.Error())
+	})
+}
diff --git a/proxy/process.go b/proxy/process.go
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go