Skip to content

Commit ef22f40

Browse files
Venkat2811ModiCodeCraftsman
authored andcommitted
[FEATURE]: metrics server support for gateway plugins & dashboard (vllm-project#1211)
Signed-off-by: Venkat Raman <vraman2811@gmail.com> Signed-off-by: Modi Tamam <modi.tamam@gmail.com>
1 parent 108208b commit ef22f40

File tree

10 files changed

+538
-16
lines changed

10 files changed

+538
-16
lines changed

cmd/plugins/main.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright 2024 The Aibrix Team.
2+
Copyright 2025 The Aibrix Team.
33
44
Licensed under the Apache License, Version 2.0 (the "License");
55
you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@ package main
1818

1919
import (
2020
"flag"
21-
"fmt"
2221
"net"
2322
"net/http"
2423
"os"
@@ -41,16 +40,17 @@ import (
4140
)
4241

4342
var (
44-
grpc_port int
43+
grpcAddr string
44+
metricsAddr string
4545
)
4646

4747
func main() {
48-
flag.IntVar(&grpc_port, "port", 50052, "gRPC port")
48+
flag.StringVar(&grpcAddr, "grpc-bind-address", ":50052", "The address the gRPC server binds to.")
49+
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
4950
klog.InitFlags(flag.CommandLine)
5051
defer klog.Flush()
5152
flag.Parse()
5253

53-
// Connect to Redis
5454
redisClient := utils.GetRedisClient()
5555
defer func() {
5656
if err := redisClient.Close(); err != nil {
@@ -79,14 +79,12 @@ func main() {
7979

8080
cache.InitForGateway(config, stopCh, redisClient)
8181

82-
// Connect to K8s cluster
8382
k8sClient, err := kubernetes.NewForConfig(config)
8483
if err != nil {
8584
klog.Fatalf("Error creating kubernetes client: %v", err)
8685
}
8786

88-
// grpc server init
89-
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", grpc_port))
87+
lis, err := net.Listen("tcp", grpcAddr)
9088
if err != nil {
9189
klog.Fatalf("failed to listen: %v", err)
9290
}
@@ -95,27 +93,34 @@ func main() {
9593
klog.Fatalf("Error on creating gateway k8s client: %v", err)
9694
}
9795

96+
gatewayServer := gateway.NewServer(redisClient, k8sClient, gatewayK8sClient)
97+
98+
if err := gatewayServer.StartMetricsServer(metricsAddr); err != nil {
99+
klog.Fatalf("Failed to start metrics server: %v", err)
100+
}
101+
klog.Infof("Started metrics server on %s", metricsAddr)
102+
98103
s := grpc.NewServer()
99-
extProcPb.RegisterExternalProcessorServer(s, gateway.NewServer(redisClient, k8sClient, gatewayK8sClient))
104+
extProcPb.RegisterExternalProcessorServer(s, gatewayServer)
100105

101106
healthCheck := health.NewServer()
102107
healthPb.RegisterHealthServer(s, healthCheck)
103108
healthCheck.SetServingStatus("gateway-plugin", healthPb.HealthCheckResponse_SERVING)
104109

105-
klog.Info("starting gRPC server on port :50052")
110+
klog.Info("starting gRPC server on " + grpcAddr)
106111

107112
go func() {
108113
if err := http.ListenAndServe("localhost:6060", nil); err != nil {
109114
klog.Fatalf("failed to setup profiling: %v", err)
110115
}
111116
}()
112117

113-
// shutdown
114118
var gracefulStop = make(chan os.Signal, 1)
115119
signal.Notify(gracefulStop, syscall.SIGINT, syscall.SIGTERM)
116120
go func() {
117121
sig := <-gracefulStop
118122
klog.Warningf("signal received: %v, initiating graceful shutdown...", sig)
123+
gatewayServer.Shutdown()
119124
s.GracefulStop()
120125
os.Exit(0)
121126
}()

config/gateway/gateway-plugin/gateway-plugin.yaml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ kind: Service
33
metadata:
44
name: gateway-plugins
55
namespace: aibrix-system
6+
labels:
7+
app: gateway-plugins
8+
annotations:
9+
prometheus.io/scrape: "true"
10+
prometheus.io/port: "8080"
11+
prometheus.io/path: "/metrics"
612
spec:
713
selector:
814
app: gateway-plugins
@@ -15,6 +21,10 @@ spec:
1521
protocol: TCP
1622
port: 6060
1723
targetPort: 6060
24+
- name: metrics
25+
protocol: TCP
26+
port: 8080
27+
targetPort: 8080
1828
---
1929
apiVersion: apps/v1
2030
kind: Deployment
@@ -70,6 +80,8 @@ spec:
7080
containerPort: 50052
7181
- name: profiling
7282
containerPort: 6060
83+
- name: metrics
84+
containerPort: 8080
7385
resources:
7486
limits:
7587
cpu: 1
@@ -186,6 +198,6 @@ spec:
186198
processingMode:
187199
request:
188200
body: Buffered
189-
response:
201+
response:
190202
body: Streamed
191-
messageTimeout: 5s
203+
messageTimeout: 5s

config/prometheus/monitor.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
app.kubernetes.io/name: aibrix
88
app.kubernetes.io/managed-by: kustomize
99
name: controller-manager-metrics-monitor
10-
namespace: system
10+
namespace: aibrix-system
1111
spec:
1212
endpoints:
1313
- path: /metrics

config/test/gateway/kustomization.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
apiVersion: kustomize.config.k8s.io/v1beta1
2-
kind: Kustomization
1+
apiVersion: kustomize.config.k8s.io/v1alpha1
2+
kind: Component
33

44
patches:
55
- path: vtc-test-env-patch.yaml

config/test/gateway/vtc-test-env-patch.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ apiVersion: apps/v1
22
kind: Deployment
33
metadata:
44
name: aibrix-gateway-plugins
5+
namespace: aibrix-system
56
spec:
67
template:
78
spec:

config/test/kustomization.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,10 @@ kind: Kustomization
33

44
resources:
55
- ../default
6+
7+
patches:
8+
- path: gateway/vtc-test-env-patch.yaml
9+
target:
10+
kind: Deployment
11+
name: aibrix-gateway-plugins
12+
namespace: aibrix-system

0 commit comments

Comments
 (0)