kubernetes-sigs · k8s-ci-robot · Jul 21, 2025 · Jul 21, 2025
diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go
@@ -61,24 +61,24 @@ type ExtProcServerRunner struct {
 
 // Default values for CLI flags in main
 const (
-	DefaultGrpcPort                                 = 9002                             // default for --grpcPort
-	DefaultGrpcHealthPort                           = 9003                             // default for --grpcHealthPort
-	DefaultMetricsPort                              = 9090                             // default for --metricsPort
+	DefaultGrpcPort                                 = 9002                             // default for --grpc-port
+	DefaultGrpcHealthPort                           = 9003                             // default for --grpc-health-port
+	DefaultMetricsPort                              = 9090                             // default for --metrics-port
 	DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb"                       // default for --destinationEndpointHintMetadataNamespace
-	DefaultDestinationEndpointHintKey               = "x-gateway-destination-endpoint" // default for --destinationEndpointHintKey
+	DefaultDestinationEndpointHintKey               = "x-gateway-destination-endpoint" // default for --destination-endpoint-hint-key
 	DefaultPoolName                                 = ""                               // required but no default
-	DefaultPoolNamespace                            = "default"                        // default for --poolNamespace
-	DefaultRefreshMetricsInterval                   = 50 * time.Millisecond            // default for --refreshMetricsInterval
-	DefaultRefreshPrometheusMetricsInterval         = 5 * time.Second                  // default for --refreshPrometheusMetricsInterval
-	DefaultSecureServing                            = true                             // default for --secureServing
-	DefaultHealthChecking                           = false                            // default for --healthChecking
-	DefaultEnablePprof                              = true                             // default for --enablePprof
-	DefaultTotalQueuedRequestsMetric                = "vllm:num_requests_waiting"      // default for --totalQueuedRequestsMetric
-	DefaultKvCacheUsagePercentageMetric             = "vllm:gpu_cache_usage_perc"      // default for --kvCacheUsagePercentageMetric
-	DefaultLoraInfoMetric                           = "vllm:lora_requests_info"        // default for --loraInfoMetric
-	DefaultCertPath                                 = ""                               // default for --certPath
-	DefaultConfigFile                               = ""                               // default for --configFile
-	DefaultConfigText                               = ""                               // default for --configText
+	DefaultPoolNamespace                            = "default"                        // default for --pool-namespace
+	DefaultRefreshMetricsInterval                   = 50 * time.Millisecond            // default for --refresh-metrics-interval
+	DefaultRefreshPrometheusMetricsInterval         = 5 * time.Second                  // default for --refresh-prometheus-metrics-interval
+	DefaultSecureServing                            = true                             // default for --secure-serving
+	DefaultHealthChecking                           = false                            // default for --health-checking
+	DefaultEnablePprof                              = true                             // default for --enable-pprof
+	DefaultTotalQueuedRequestsMetric                = "vllm:num_requests_waiting"      // default for --total-queued-requests-metric
+	DefaultKvCacheUsagePercentageMetric             = "vllm:gpu_cache_usage_perc"      // default for --kv-cache-usage-percentage-metric
+	DefaultLoraInfoMetric                           = "vllm:lora_requests_info"        // default for --lora-info-metric
+	DefaultCertPath                                 = ""                               // default for --cert-path
+	DefaultConfigFile                               = ""                               // default for --config-file
+	DefaultConfigText                               = ""                               // default for --config-text
 )
 
 // NewDefaultExtProcServerRunner creates a runner with default values.

diff --git a/site-src/guides/epp-configuration/config-text.md b/site-src/guides/epp-configuration/config-text.md
@@ -1,6 +1,6 @@
 # Configuring Plugins via text
 
-The set of lifecycle hooks (plugins) that are used by the Inference Gateway (IGW) is determined by how 
+The set of lifecycle hooks (plugins) that are used by the Inference Gateway (IGW) is determined by how
 it is configured. The IGW can be configured in several ways, either by code or via text.
 
 If configured by code either a set of predetermined environment variables must be used or one must
@@ -95,7 +95,7 @@ schedulingProfiles:
     weight: 50
 ```
 
-If the configuration is in a file, the EPP command line argument `--configFile`
+If the configuration is in a file, the EPP command line argument `--config-file`
 should be used to specify the full path of the file in question. For example:
 
 ```yaml
@@ -115,14 +115,14 @@ spec:
         image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "${POOL_NAME}"
         ...
-        - --configFile
+        - --config-file
         - "/etc/epp/epp-config.yaml"
 ```
 
-If the configuration is passed as in-line text the EPP command line argument `--configText`
+If the configuration is passed as in-line text the EPP command line argument `--config-text`
 should be used. For example:
 
 ```yaml
@@ -142,10 +142,10 @@ spec:
         image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
         imagePullPolicy: IfNotPresent
         args:
-        - -poolName
+        - --pool-name
         - "${POOL_NAME}"
         ...
-        - --configText
+        - --config-text
         - |
           apiVersion: inference.networking.x-k8s.io/v1alpha1
           kind: EndpointPickerConfig
@@ -194,7 +194,7 @@ number of pods, and finds the pods that fall into the first range.
 
 #### **LoraAffinityFilter**
 
-Implements a pod selection strategy that when the use of a LoRA adapter is requested, prioritizes pods 
+Implements a pod selection strategy that when the use of a LoRA adapter is requested, prioritizes pods
 that are believed to have the specific LoRA adapter loaded. It also allows for load balancing through
 some randomization.
 
@@ -252,4 +252,4 @@ waiting queue size the pod has, the higher the score it will get (since it's mor
 available to serve new request).
 
 - *Type*: queue-scorer
-- *Parameters*: none
+- *Parameters*: none
diff --git a/site-src/guides/index.md b/site-src/guides/index.md
@@ -164,7 +164,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
          ./istioctl install --set tag=$TAG --set hub=gcr.io/istio-testing
          ```
 
-      3. If you run the Endpoint Picker (EPP) with the `--secureServing` flag set to `true` (the default mode), it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
+      3. If you run the Endpoint Picker (EPP) with the `--secure-serving` flag set to `true` (the default mode), it is currently using a self-signed certificate. As a security measure, Istio does not trust self-signed certificates by default. As a temporary workaround, you can apply the destination rule to bypass TLS verification for EPP. A more secure TLS implementation in EPP is being discussed in [Issue 582](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582).
 
          ```bash
          kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/istio/destination-rule.yaml

diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md
@@ -253,19 +253,19 @@ spec:
         image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
         imagePullPolicy: Always
         args:
-        - -poolName
+        - --pool-name
         - "vllm-llama3-8b-instruct-new"
-        - -poolNamespace
+        - --pool-namespace
         - "default"
-        - -v
+        - --v
         - "4"
         - --zap-encoder
         - "json"
-        - -grpcPort
+        - --grpc-port
         - "9002"
-        - -grpcHealthPort
+        - --grpc-health-port
         - "9003"
-        - -configFile
+        - --config-file
         - "/config/default-plugins.yaml"
         ports:
         - containerPort: 9002
@@ -468,4 +468,4 @@ kubectl delete Deployment vllm-llama3-8b-instruct-epp --ignore-not-found
 kubectl delete Service vllm-llama3-8b-instruct-epp --ignore-not-found
 ```
 
-With this, all requests should be served by the new Inference Pool.
+With this, all requests should be served by the new Inference Pool.
diff --git a/site-src/implementations/model-servers.md b/site-src/implementations/model-servers.md
@@ -27,12 +27,12 @@ Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install the [`i
 ### Option 2: Edit EPP deployment yaml
 
  Add the following to the `args` of the [EPP deployment](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/42eb5ff1c5af1275df43ac384df0ddf20da95134/config/manifests/inferencepool-resources.yaml#L32)
- 
+
  ```
-- -totalQueuedRequestsMetric
+- --total-queued-requests-metric
 - "nv_trt_llm_request_metrics{request_type=waiting}"
-- -kvCacheUsagePercentageMetric
+- --kv-cache-usage-percentage-metric
 - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
-- -loraInfoMetric
+- --lora-info-metric
 - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
-```
+```