Skip to content

Commit 1766292

Browse files
committed
feat: config hot-reload - fix merge conflicts
2 parents fa23166 + 5c5a5da commit 1766292

File tree

9 files changed

+294
-80
lines changed

9 files changed

+294
-80
lines changed

.coderabbit.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
2+
language: "en-US"
3+
early_access: false
4+
reviews:
5+
profile: "chill"
6+
request_changes_workflow: false
7+
high_level_summary: true
8+
poem: false
9+
review_status: true
10+
collapse_walkthrough: false
11+
auto_review:
12+
enabled: true
13+
drafts: false
14+
chat:
15+
auto_reply: true

.github/workflows/containers.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ jobs:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
platform: [intel, cuda, vulkan, cpu, musa]
18+
#platform: [intel, cuda, vulkan, cpu, musa]
19+
platform: [cuda, vulkan, cpu, musa]
1920
fail-fast: false
2021
steps:
2122
- name: Checkout code

README.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@ healthCheckTimeout: 60
7070
# Valid log levels: debug, info (default), warn, error
7171
logLevel: info
7272

73+
# Automatic Port Values
74+
# use ${PORT} in model.cmd and model.proxy to use an automatic port number
75+
# when you use ${PORT} you can omit a custom model.proxy value, as it will
76+
# default to http://localhost:${PORT}
77+
78+
# override the default port (5800) for automatic port values
79+
startPort: 10001
80+
7381
# define valid model values and the upstream server start
7482
models:
7583
"llama":
@@ -83,6 +91,7 @@ models:
8391
- "CUDA_VISIBLE_DEVICES=0"
8492

8593
# where to reach the server started by cmd, make sure the ports match
94+
# can be omitted if you use an automatic ${PORT} in cmd
8695
proxy: http://127.0.0.1:8999
8796

8897
# aliases names to use this model for
@@ -109,14 +118,14 @@ models:
109118
# but they can still be requested as normal
110119
"qwen-unlisted":
111120
unlisted: true
112-
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
121+
cmd: llama-server --port ${PORT} -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
113122

114123
# Docker Support (v26.1.4+ required!)
115124
"docker-llama":
116-
proxy: "http://127.0.0.1:9790"
125+
proxy: "http://127.0.0.1:${PORT}"
117126
cmd: >
118127
docker run --name dockertest
119-
--init --rm -p 9790:8080 -v /mnt/nvme/models:/models
128+
--init --rm -p ${PORT}:8080 -v /mnt/nvme/models:/models
120129
ghcr.io/ggerganov/llama.cpp:server
121130
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
122131
@@ -180,11 +189,6 @@ groups:
180189
- [Speculative Decoding](examples/speculative-decoding/README.md) - using a small draft model can increase inference speeds from 20% to 40%. This example includes a configurations Qwen2.5-Coder-32B (2.5x increase) and Llama-3.1-70B (1.4x increase) in the best cases.
181190
- [Optimizing Code Generation](examples/benchmark-snakegame/README.md) - find the optimal settings for your machine. This example demonstrates defining multiple configurations and testing which one is fastest.
182191
- [Restart on Config Change](examples/restart-on-config-change/README.md) - automatically restart llama-swap when trying out different configurations.
183-
184-
## Configuration
185-
186-
llama-s
187-
188192
</details>
189193

190194
## Docker Install ([download images](https://github.com/mostlygeek/llama-swap/pkgs/container/llama-swap))

misc/simple-responder/simple-responder.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@ func main() {
3333

3434
// Set up the handler function using the provided response message
3535
r.POST("/v1/chat/completions", func(c *gin.Context) {
36-
c.Header("Content-Type", "text/plain")
36+
c.Header("Content-Type", "application/json")
3737

3838
// add a wait to simulate a slow query
3939
if wait, err := time.ParseDuration(c.Query("wait")); err == nil {
4040
time.Sleep(wait)
4141
}
4242

43-
c.String(200, *responseMessage)
43+
c.JSON(http.StatusOK, gin.H{
44+
"responseMessage": *responseMessage,
45+
"h_content_length": c.Request.Header.Get("Content-Length"),
46+
})
4447
})
4548

4649
// for issue #62 to check model name strips profile slug
@@ -63,8 +66,11 @@ func main() {
6366
})
6467

6568
r.POST("/v1/completions", func(c *gin.Context) {
66-
c.Header("Content-Type", "text/plain")
67-
c.String(200, *responseMessage)
69+
c.Header("Content-Type", "application/json")
70+
c.JSON(http.StatusOK, gin.H{
71+
"responseMessage": *responseMessage,
72+
})
73+
6874
})
6975

7076
// issue #41
@@ -104,6 +110,10 @@ func main() {
104110
c.JSON(http.StatusOK, gin.H{
105111
"text": fmt.Sprintf("The length of the file is %d bytes", fileSize),
106112
"model": model,
113+
114+
// expose some header values for testing
115+
"h_content_type": c.GetHeader("Content-Type"),
116+
"h_content_length": c.GetHeader("Content-Length"),
107117
})
108118
})
109119

proxy/config.go

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ package proxy
22

33
import (
44
"fmt"
5+
"io"
56
"os"
67
"sort"
8+
"strconv"
79
"strings"
810

911
"github.com/google/shlex"
@@ -62,6 +64,9 @@ type Config struct {
6264

6365
// map aliases to actual model IDs
6466
aliases map[string]string
67+
68+
// automatic port assignments
69+
StartPort int `yaml:"startPort"`
6570
}
6671

6772
func (c *Config) RealModelName(search string) (string, bool) {
@@ -83,7 +88,16 @@ func (c *Config) FindConfig(modelName string) (ModelConfig, string, bool) {
8388
}
8489

8590
func LoadConfig(path string) (Config, error) {
86-
data, err := os.ReadFile(path)
91+
file, err := os.Open(path)
92+
if err != nil {
93+
return Config{}, err
94+
}
95+
defer file.Close()
96+
return LoadConfigFromReader(file)
97+
}
98+
99+
func LoadConfigFromReader(r io.Reader) (Config, error) {
100+
data, err := io.ReadAll(r)
87101
if err != nil {
88102
return Config{}, err
89103
}
@@ -98,14 +112,50 @@ func LoadConfig(path string) (Config, error) {
98112
config.HealthCheckTimeout = 15
99113
}
100114

115+
// set default port ranges
116+
if config.StartPort == 0 {
117+
// default to 5800
118+
config.StartPort = 5800
119+
} else if config.StartPort < 1 {
120+
return Config{}, fmt.Errorf("startPort must be greater than 1")
121+
}
122+
101123
// Populate the aliases map
102124
config.aliases = make(map[string]string)
103125
for modelName, modelConfig := range config.Models {
104126
for _, alias := range modelConfig.Aliases {
127+
if _, found := config.aliases[alias]; found {
128+
return Config{}, fmt.Errorf("duplicate alias %s found in model: %s", alias, modelName)
129+
}
105130
config.aliases[alias] = modelName
106131
}
107132
}
108133

134+
// iterate over the models and replace any ${PORT} with the next available port
135+
// Get and sort all model IDs first, makes testing more consistent
136+
modelIds := make([]string, 0, len(config.Models))
137+
for modelId := range config.Models {
138+
modelIds = append(modelIds, modelId)
139+
}
140+
sort.Strings(modelIds) // This guarantees stable iteration order
141+
142+
// iterate over the sorted models
143+
nextPort := config.StartPort
144+
for _, modelId := range modelIds {
145+
modelConfig := config.Models[modelId]
146+
if strings.Contains(modelConfig.Cmd, "${PORT}") {
147+
modelConfig.Cmd = strings.ReplaceAll(modelConfig.Cmd, "${PORT}", strconv.Itoa(nextPort))
148+
if modelConfig.Proxy == "" {
149+
modelConfig.Proxy = fmt.Sprintf("http://localhost:%d", nextPort)
150+
} else {
151+
modelConfig.Proxy = strings.ReplaceAll(modelConfig.Proxy, "${PORT}", strconv.Itoa(nextPort))
152+
}
153+
nextPort++
154+
config.Models[modelId] = modelConfig
155+
} else if modelConfig.Proxy == "" {
156+
return Config{}, fmt.Errorf("model %s requires a proxy value when not using automatic ${PORT}", modelId)
157+
}
158+
}
109159
config = AddDefaultGroupToConfig(config)
110160
// check that members are all unique in the groups
111161
memberUsage := make(map[string]string) // maps member to group it appears in

proxy/config_test.go

Lines changed: 103 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package proxy
33
import (
44
"os"
55
"path/filepath"
6+
"strings"
67
"testing"
78

89
"github.com/stretchr/testify/assert"
@@ -43,6 +44,7 @@ models:
4344
checkEndpoint: "/"
4445
model4:
4546
cmd: path/to/cmd --arg1 one
47+
proxy: "http://localhost:8082"
4648
checkEndpoint: "/"
4749
4850
healthCheckTimeout: 15
@@ -73,6 +75,7 @@ groups:
7375
}
7476

7577
expected := Config{
78+
StartPort: 5800,
7679
Models: map[string]ModelConfig{
7780
"model1": {
7881
Cmd: "path/to/cmd --arg1 one",
@@ -97,6 +100,7 @@ groups:
97100
},
98101
"model4": {
99102
Cmd: "path/to/cmd --arg1 one",
103+
Proxy: "http://localhost:8082",
100104
CheckEndpoint: "/",
101105
},
102106
},
@@ -138,14 +142,6 @@ groups:
138142
}
139143

140144
func TestConfig_GroupMemberIsUnique(t *testing.T) {
141-
// Create a temporary YAML file for testing
142-
tempDir, err := os.MkdirTemp("", "test-config")
143-
if err != nil {
144-
t.Fatalf("Failed to create temporary directory: %v", err)
145-
}
146-
defer os.RemoveAll(tempDir)
147-
148-
tempFile := filepath.Join(tempDir, "config.yaml")
149145
content := `
150146
models:
151147
model1:
@@ -171,15 +167,35 @@ groups:
171167
exclusive: false
172168
members: ["model2"]
173169
`
170+
// Load the config and verify
171+
_, err := LoadConfigFromReader(strings.NewReader(content))
174172

175-
if err := os.WriteFile(tempFile, []byte(content), 0644); err != nil {
176-
t.Fatalf("Failed to write temporary file: %v", err)
177-
}
173+
// a Contains as order of the map is not guaranteed
174+
assert.Contains(t, err.Error(), "model member model2 is used in multiple groups:")
175+
}
178176

177+
func TestConfig_ModelAliasesAreUnique(t *testing.T) {
178+
content := `
179+
models:
180+
model1:
181+
cmd: path/to/cmd --arg1 one
182+
proxy: "http://localhost:8080"
183+
aliases:
184+
- m1
185+
model2:
186+
cmd: path/to/cmd --arg1 one
187+
proxy: "http://localhost:8081"
188+
checkEndpoint: "/"
189+
aliases:
190+
- m1
191+
- m2
192+
`
179193
// Load the config and verify
180-
_, err = LoadConfig(tempFile)
181-
assert.NotNil(t, err)
194+
_, err := LoadConfigFromReader(strings.NewReader(content))
182195

196+
// this is a contains because it could be `model1` or `model2` depending on the order
197+
// go decided on the order of the map
198+
assert.Contains(t, err.Error(), "duplicate alias m1 found in model: model")
183199
}
184200

185201
func TestConfig_ModelConfigSanitizedCommand(t *testing.T) {
@@ -269,3 +285,77 @@ func TestConfig_SanitizeCommand(t *testing.T) {
269285
assert.Error(t, err)
270286
assert.Nil(t, args)
271287
}
288+
289+
func TestConfig_AutomaticPortAssignments(t *testing.T) {
290+
291+
t.Run("Default Port Ranges", func(t *testing.T) {
292+
content := ``
293+
config, err := LoadConfigFromReader(strings.NewReader(content))
294+
if !assert.NoError(t, err) {
295+
t.Fatalf("Failed to load config: %v", err)
296+
}
297+
298+
assert.Equal(t, 5800, config.StartPort)
299+
})
300+
t.Run("User specific port ranges", func(t *testing.T) {
301+
content := `startPort: 1000`
302+
config, err := LoadConfigFromReader(strings.NewReader(content))
303+
if !assert.NoError(t, err) {
304+
t.Fatalf("Failed to load config: %v", err)
305+
}
306+
307+
assert.Equal(t, 1000, config.StartPort)
308+
})
309+
310+
t.Run("Invalid start port", func(t *testing.T) {
311+
content := `startPort: abcd`
312+
_, err := LoadConfigFromReader(strings.NewReader(content))
313+
assert.NotNil(t, err)
314+
})
315+
316+
t.Run("start port must be greater than 1", func(t *testing.T) {
317+
content := `startPort: -99`
318+
_, err := LoadConfigFromReader(strings.NewReader(content))
319+
assert.NotNil(t, err)
320+
})
321+
322+
t.Run("Automatic port assignments", func(t *testing.T) {
323+
content := `
324+
startPort: 5800
325+
models:
326+
model1:
327+
cmd: svr --port ${PORT}
328+
model2:
329+
cmd: svr --port ${PORT}
330+
proxy: "http://172.11.22.33:${PORT}"
331+
model3:
332+
cmd: svr --port 1999
333+
proxy: "http://1.2.3.4:1999"
334+
`
335+
config, err := LoadConfigFromReader(strings.NewReader(content))
336+
if !assert.NoError(t, err) {
337+
t.Fatalf("Failed to load config: %v", err)
338+
}
339+
340+
assert.Equal(t, 5800, config.StartPort)
341+
assert.Equal(t, "svr --port 5800", config.Models["model1"].Cmd)
342+
assert.Equal(t, "http://localhost:5800", config.Models["model1"].Proxy)
343+
344+
assert.Equal(t, "svr --port 5801", config.Models["model2"].Cmd)
345+
assert.Equal(t, "http://172.11.22.33:5801", config.Models["model2"].Proxy)
346+
347+
assert.Equal(t, "svr --port 1999", config.Models["model3"].Cmd)
348+
assert.Equal(t, "http://1.2.3.4:1999", config.Models["model3"].Proxy)
349+
350+
})
351+
352+
t.Run("Proxy value required if no ${PORT} in cmd", func(t *testing.T) {
353+
content := `
354+
models:
355+
model1:
356+
cmd: svr --port 111
357+
`
358+
_, err := LoadConfigFromReader(strings.NewReader(content))
359+
assert.Equal(t, "model model1 requires a proxy value when not using automatic ${PORT}", err.Error())
360+
})
361+
}

0 commit comments

Comments
 (0)