Skip to content

Commit 90adb3d

Browse files
committed
Adding pprof endpoints to metrics port
1 parent 4021279 commit 90adb3d

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

cmd/epp/runner/runner.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"flag"
2222
"fmt"
23+
"net/http/pprof"
2324

2425
"github.com/go-logr/logr"
2526
"github.com/prometheus/client_golang/prometheus"
@@ -215,6 +216,11 @@ func (r *Runner) Run(ctx context.Context) error {
215216
setupLog.Error(err, "Failed to create controller manager")
216217
return err
217218
}
219+
err = setupPprofHandlers(mgr)
220+
if err != nil {
221+
setupLog.Error(err, "Failed to setup pprof handlers")
222+
return err
223+
}
218224

219225
if len(*configText) != 0 || len(*configFile) != 0 {
220226
theConfig, err := loader.LoadConfig([]byte(*configText), *configFile)
@@ -403,3 +409,24 @@ func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logge
403409
logger.Info("Not scraping metric: LoraRequestInfo")
404410
}
405411
}
412+
413+
// setupPprofHandlers only implements the pre-defined profiles:
414+
// https://cs.opensource.google/go/go/+/refs/tags/go1.24.4:src/runtime/pprof/pprof.go;l=108
415+
func setupPprofHandlers(mgr ctrl.Manager) error {
416+
var err error
417+
profiles := []string{
418+
"heap",
419+
"goroutine",
420+
"allocs",
421+
"threadcreate",
422+
"block",
423+
"mutex",
424+
}
425+
for _, p := range profiles {
426+
err = mgr.AddMetricsServerExtraHandler("/debug/pprof/"+p, pprof.Handler(p))
427+
if err != nil {
428+
return err
429+
}
430+
}
431+
return nil
432+
}

site-src/guides/metrics.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Metrics
1+
# Metrics & Observability
22

3-
This guide describes the current state of exposed metrics and how to scrape them.
3+
This guide describes the current state of exposed metrics and how to scrape them, as well as accessing pprof profiles.
44

55
## Requirements
66

@@ -53,7 +53,7 @@ This guide describes the current state of exposed metrics and how to scrape them
5353
|:---------------------------|:-----------------|:-------------------------------------------------|:------------------------------------------|:------------|
5454
| lora_syncer_adapter_status | Gauge | Status of LoRA adapters (1=loaded, 0=not_loaded) | `adapter_name`=<adapter-id> | ALPHA |
5555

56-
## Scrape Metrics
56+
## Scrape Metrics & Pprof profiles
5757

5858
The metrics endpoints are exposed on different ports by default:
5959

@@ -73,6 +73,7 @@ metadata:
7373
rules:
7474
- nonResourceURLs:
7575
- /metrics
76+
- /debug/pprof/*
7677
verbs:
7778
- get
7879
---
@@ -116,6 +117,16 @@ kubectl -n default port-forward inference-gateway-ext-proc-pod-name 9090
116117
curl -H "Authorization: Bearer $TOKEN" localhost:9090/metrics
117118
```
118119

120+
### Pprof profiles
121+
122+
Currently only the [predefined profiles](https://pkg.go.dev/runtime/pprof#Profile) are supported, CPU profiling will require code changes. Assuming the EPP has been port-forwarded as in the above example, to get the PGN display of the `heap` profile simply run:
123+
124+
```
125+
PROFILE_NAME=heap
126+
curl -H "Authorization: Bearer $TOKEN" localhost:9090/debug/pprof/$PROFILE_NAME -o profile.out
127+
go tool pprof -png profile.out
128+
```
129+
119130
## Prometheus Alerts
120131

121132
The section instructs how to configure prometheus alerts using collected metrics.

0 commit comments

Comments
 (0)