Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cmd/bbr/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ import (

var (
grpcPort = flag.Int(
"grpcPort",
"grpc-port",
9004,
"The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int(
"grpcHealthPort",
"grpc-health-port",
9005,
"The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int(
"metricsPort", 9090, "The metrics port")
"metrics-port", 9090, "The metrics port")
streaming = flag.Bool(
"streaming", false, "Enables streaming support for Envoy full-duplex streaming mode")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
Expand Down
58 changes: 29 additions & 29 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,91 +52,91 @@ import (

var (
grpcPort = flag.Int(
"grpcPort",
"grpc-port",
runserver.DefaultGrpcPort,
"The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int(
"grpcHealthPort",
"grpc-health-port",
runserver.DefaultGrpcHealthPort,
"The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int(
"metricsPort",
"metrics-port",
runserver.DefaultMetricsPort,
"The metrics port")
enablePprof = flag.Bool(
"enablePprof",
"enable-pprof",
runserver.DefaultEnablePprof,
"Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
destinationEndpointHintKey = flag.String(
"destinationEndpointHintKey",
"destination-endpoint-hint-key",
runserver.DefaultDestinationEndpointHintKey,
"Header and response metadata key used by Envoy to route to the appropriate pod. This must match Envoy configuration.")
destinationEndpointHintMetadataNamespace = flag.String(
"DestinationEndpointHintMetadataNamespace",
"destination-endpoint-hint-metadata-namespace",
runserver.DefaultDestinationEndpointHintMetadataNamespace,
"The key for the outer namespace struct in the metadata field of the extproc response that is used to wrap the"+
"target endpoint. If not set, then an outer namespace struct should not be created.")
poolName = flag.String(
"poolName",
"pool-name",
runserver.DefaultPoolName,
"Name of the InferencePool this Endpoint Picker is associated with.")
poolNamespace = flag.String(
"poolNamespace",
"pool-namespace",
runserver.DefaultPoolNamespace,
"Namespace of the InferencePool this Endpoint Picker is associated with.")
refreshMetricsInterval = flag.Duration(
"refreshMetricsInterval",
"refresh-metrics-interval",
runserver.DefaultRefreshMetricsInterval,
"interval to refresh metrics")
refreshPrometheusMetricsInterval = flag.Duration(
"refreshPrometheusMetricsInterval",
"refresh-prometheus-metrics-interval",
runserver.DefaultRefreshPrometheusMetricsInterval,
"interval to flush prometheus metrics")
logVerbosity = flag.Int(
"v",
logging.DEFAULT,
"number for the log level verbosity")
secureServing = flag.Bool(
"secureServing",
"secure-serving",
runserver.DefaultSecureServing,
"Enables secure serving. Defaults to true.")
healthChecking = flag.Bool(
"healthChecking",
"health-checking",
runserver.DefaultHealthChecking,
"Enables health checking")
certPath = flag.String(
"certPath",
"cert-path",
runserver.DefaultCertPath,
"The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")
// metric flags
totalQueuedRequestsMetric = flag.String(
"totalQueuedRequestsMetric",
"total-queued-requests-metric",
runserver.DefaultTotalQueuedRequestsMetric,
"Prometheus metric for the number of queued requests.")
kvCacheUsagePercentageMetric = flag.String(
"kvCacheUsagePercentageMetric",
"kv-cache-usage-percentage-metric",
runserver.DefaultKvCacheUsagePercentageMetric,
"Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
// LoRA metrics
loraInfoMetric = flag.String(
"loraInfoMetric",
"lora-info-metric",
runserver.DefaultLoraInfoMetric,
"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
// configuration flags
configFile = flag.String(
"configFile",
"config-file",
runserver.DefaultConfigFile,
"The path to the configuration file")
configText = flag.String(
"configText",
"config-text",
runserver.DefaultConfigText,
"The configuration specified as text, in lieu of a file")

modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods. "+
modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")

setupLog = ctrl.Log.WithName("setup")
)
Expand Down Expand Up @@ -167,16 +167,16 @@ func (r *Runner) WithSchedulerConfig(schedulerConfig *scheduling.SchedulerConfig
func bindEnvToFlags() {
// map[ENV_VAR]flagName – add more as needed
for env, flg := range map[string]string{
"GRPC_PORT": "grpcPort",
"GRPC_HEALTH_PORT": "grpcHealthPort",
"MODEL_SERVER_METRICS_PORT": "modelServerMetricsPort",
"MODEL_SERVER_METRICS_PATH": "modelServerMetricsPath",
"DESTINATION_ENDPOINT_HINT_KEY": "destinationEndpointHintKey",
"POOL_NAME": "poolName",
"POOL_NAMESPACE": "poolNamespace",
"GRPC_PORT": "grpc-port",
"GRPC_HEALTH_PORT": "grpc-health-port",
"MODEL_SERVER_METRICS_PORT": "model-server-metrics-port",
"MODEL_SERVER_METRICS_PATH": "model-server-metrics-path",
"DESTINATION_ENDPOINT_HINT_KEY": "destination-endpoint-hint-key",
"POOL_NAME": "pool-name",
"POOL_NAMESPACE": "pool-namespace",
// durations & bools work too; flag.Set expects the *string* form
"REFRESH_METRICS_INTERVAL": "refreshMetricsInterval",
"SECURE_SERVING": "secureServing",
"REFRESH_METRICS_INTERVAL": "refresh-metrics-interval",
"SECURE_SERVING": "secure-serving",
} {
if v := os.Getenv(env); v != "" {
// ignore error; Parse() will catch invalid values later
Expand Down
4 changes: 2 additions & 2 deletions config/charts/body-based-routing/templates/bbr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ spec:
image: {{ .Values.bbr.image.hub }}/{{ .Values.bbr.image.name }}:{{ .Values.bbr.image.tag }}
imagePullPolicy: {{ .Values.bbr.image.pullPolicy | default "Always" }}
args:
- "-streaming"
- "-v"
- "--streaming"
- "--v"
- "3"
ports:
- containerPort: {{ .Values.bbr.port }}
Expand Down
22 changes: 11 additions & 11 deletions config/charts/inferencepool/templates/epp-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,28 @@ spec:
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
args:
- -poolName
- --pool-name
- {{ .Release.Name }}
- -poolNamespace
- --pool-namespace
- {{ .Release.Namespace }}
- -v
- --v
- "3"
- -grpcPort
- --grpc-port
- "9002"
- -grpcHealthPort
- --grpc-health-port
- "9003"
- -metricsPort
- --metrics-port
- "9090"
- -configFile
- --config-file
- "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
- "-enablePprof={{ .Values.inferenceExtension.enablePprof }}"
- "--enable-pprof={{ .Values.inferenceExtension.enablePprof }}"
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
- -totalQueuedRequestsMetric
- --total-queued-requests-metric
- "nv_trt_llm_request_metrics{request_type=waiting}"
- -kvCacheUsagePercentageMetric
- --kv-cache-usage-percentage-metric
- "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
- -loraInfoMetric
- --lora-info-metric
- "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
{{- end }}
ports:
Expand Down
12 changes: 6 additions & 6 deletions config/manifests/inferencepool-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,19 @@ spec:
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- --pool-name
- "vllm-llama3-8b-instruct"
- "-poolNamespace"
- "--pool-namespace"
- "default"
- -v
- --v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- --grpc-port
- "9002"
- -grpcHealthPort
- --grpc-health-port
- "9003"
- "-configFile"
- "--config-file"
- "/config/default-plugins.yaml"
ports:
- containerPort: 9002
Expand Down
24 changes: 12 additions & 12 deletions conformance/resources/manifests/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,19 @@ spec:
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- --pool-name
- "primary-inference-pool"
- -poolNamespace
- --pool-namespace
- "gateway-conformance-app-backend"
- -v
- --v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- --grpc-port
- "9002"
- -grpcHealthPort
- --grpc-health-port
- "9003"
- "-configFile"
- "--config-file"
- "/config/conformance-plugins.yaml"
ports:
- containerPort: 9002
Expand Down Expand Up @@ -293,19 +293,19 @@ spec:
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- --pool-name
- "secondary-inference-pool"
- -poolNamespace
- --pool-namespace
- "gateway-conformance-app-backend"
- -v
- --v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- --grpc-port
- "9002"
- -grpcHealthPort
- --grpc-health-port
- "9003"
- "-configFile"
- "--config-file"
- "/config/conformance-plugins.yaml"
ports:
- containerPort: 9002
Expand Down
12 changes: 6 additions & 6 deletions test/testdata/inferencepool-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,19 @@ spec:
image: $E2E_IMAGE
imagePullPolicy: IfNotPresent
args:
- -poolName
- --pool-name
- "vllm-llama3-8b-instruct"
- -poolNamespace
- --pool-namespace
- "$E2E_NS"
- -v
- --v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- --grpc-port
- "9002"
- -grpcHealthPort
- --grpc-health-port
- "9003"
- "-configFile"
- "--config-file"
- "/config/default-plugins.yaml"
ports:
- containerPort: 9002
Expand Down