vllm-project · varungup90 · Aug 1, 2025 · Aug 1, 2025 · Aug 1, 2025 · gemini-code-assist
diff --git a/config/gateway/gateway-plugin/gateway-plugin.yaml b/config/gateway/gateway-plugin/gateway-plugin.yaml
@@ -104,6 +104,8 @@ spec:
               value: "16"
             - name: AIBRIX_PREFIX_CACHE_STANDARD_DEVIATION_FACTOR
               value: "2"
+            - name: AIBRIX_PREFILL_REQUEST_TIMEOUT
+              value: "60"
             # Uncomment to enable request tracing for GPU optimizer, default "false".
             # - name: AIBRIX_GPU_OPTIMIZER_TRACING_FLAG
             #   value: "true"
@@ -200,4 +202,4 @@ spec:
           body: Buffered
         response:
           body: Streamed
-      messageTimeout: 5s
+      messageTimeout: 60s
diff --git a/dist/chart/values.yaml b/dist/chart/values.yaml
@@ -67,11 +67,12 @@ gatewayPlugin:
       AIBRIX_PREFIX_CACHE_BLOCK_SIZE: "128"
       AIBRIX_PREFIX_CACHE_POD_RUNNING_REQUEST_IMBALANCE_ABS_COUNT: "16"
       AIBRIX_PREFIX_CACHE_STANDARD_DEVIATION_FACTOR: "2"
+      AIBRIX_PREFILL_REQUEST_TIMEOUT: "60"
   dependencies:
     redis:
       host: aibrix-redis-master
       port: 6379
-  messageTimeout: "5s"
+  messageTimeout: "60s"
 
 
 gpuOptimizer:

diff --git a/dist/chart/vke.yaml b/dist/chart/vke.yaml
@@ -52,11 +52,12 @@ gatewayPlugin:
       AIBRIX_PREFIX_CACHE_BLOCK_SIZE: "128"
       AIBRIX_PREFIX_CACHE_POD_RUNNING_REQUEST_IMBALANCE_ABS_COUNT: "16"
       AIBRIX_PREFIX_CACHE_STANDARD_DEVIATION_FACTOR: "2"
+      AIBRIX_PREFILL_REQUEST_TIMEOUT: "60"
   dependencies:
     redis:
       host: aibrix-redis-master
       port: 6379
-  messageTimeout: "5s"
+  messageTimeout: "60s"
 
 
 gpuOptimizer:

diff --git a/pkg/plugins/gateway/algorithms/pd_disaggregation.go b/pkg/plugins/gateway/algorithms/pd_disaggregation.go
@@ -45,6 +45,11 @@ const (
 	PDRoleIdentifier              string                 = "role-name"
 	RoleReplicaIndex              string                 = "stormservice.orchestration.aibrix.ai/role-replica-index"
 	PodGroupIndex                 string                 = "stormservice.orchestration.aibrix.ai/pod-group-index"
+	defaultPrefillRequestTimeout  int                    = 30
+)
+
+var (
+	prefillRequestTimeout int = utils.LoadEnvInt("AIBRIX_PREFILL_REQUEST_TIMEOUT", defaultPrefillRequestTimeout)
 )
 
 func init() {
@@ -239,7 +244,7 @@ func (r *pdRouter) executeHTTPRequest(url string, routingCtx *types.RoutingConte
 	req.Header.Set("content-length", strconv.Itoa(len(payload)))
 
 	// Execute with timeout
-	client := &http.Client{Timeout: 30 * time.Second}
+	client := &http.Client{Timeout: time.Duration(prefillRequestTimeout) * time.Second}
 	resp, err := client.Do(req)
 	if err != nil {
 		return fmt.Errorf("failed to execute http prefill request: %w", err)

diff --git a/pkg/plugins/gateway/algorithms/prefix_cache_test.go b/pkg/plugins/gateway/algorithms/prefix_cache_test.go
@@ -18,7 +18,6 @@ package routingalgorithms
 
 import (
 	"context"
-	"log"
 	"slices"
 	"testing"
 
@@ -150,8 +149,8 @@ func Test_PrefixCacheE2E(t *testing.T) {
 	}
 	ctx7 := types.NewRoutingContext(context.Background(), RouterPrefixCache, "m1", input, "r7", "")
 	p1, err := prefixCacheRouter.Route(ctx7, podList)
-	log.Println(p2, p3, p4)
-	log.Println(p1)
+	t.Log(p2, p3, p4)
+	t.Log(p1)
 	assert.NoError(t, err)
 	assert.False(t, slices.Contains([]string{p2, p3, p4}, p1))
 }