Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ var (
"metricsPort",
runserver.DefaultMetricsPort,
"The metrics port")
enablePprof = flag.Bool(
"enablePprof",
runserver.DefaultEnablePprof,
"Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
destinationEndpointHintKey = flag.String(
"destinationEndpointHintKey",
runserver.DefaultDestinationEndpointHintKey,
Expand Down Expand Up @@ -263,10 +267,14 @@ func (r *Runner) Run(ctx context.Context) error {
setupLog.Error(err, "Failed to create controller manager")
return err
}
err = setupPprofHandlers(mgr)
if err != nil {
setupLog.Error(err, "Failed to setup pprof handlers")
return err

if *enablePprof {
setupLog.Info("Enabling pprof handlers")
err = setupPprofHandlers(mgr)
if err != nil {
setupLog.Error(err, "Failed to setup pprof handlers")
return err
}
}

err = r.parseConfiguration(ctx)
Expand Down
2 changes: 2 additions & 0 deletions config/charts/inferencepool/templates/epp-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ spec:
- "9003"
- -metricsPort
- "9090"
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
- "-enablePprof={{ .Values.inferenceExtension.enablePprof }}"
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
- -totalQueuedRequestsMetric
- "nv_trt_llm_request_metrics{request_type=waiting}"
Expand Down
1 change: 1 addition & 0 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ inferenceExtension:
pullPolicy: Always
extProcPort: 9002
env: {}
enablePprof: true # Enable pprof handlers for profiling and debugging
# Example environment variables:
# env:
# KV_CACHE_SCORE_WEIGHT: "1"
Expand Down
1 change: 1 addition & 0 deletions pkg/epp/server/runserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ const (
DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval
DefaultSecureServing = true // default for --secureServing
DefaultHealthChecking = false // default for --healthChecking
DefaultEnablePprof = true // default for --enablePprof
DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --totalQueuedRequestsMetric
DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kvCacheUsagePercentageMetric
DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --loraInfoMetric
Expand Down