kubernetes-sigs · k8s-ci-robot · Jun 21, 2025 · Jun 9, 2025 · Jun 10, 2025 · Jun 11, 2025
diff --git a/conformance/conformance.go b/conformance/conformance.go
@@ -25,6 +25,7 @@ import (
 	"io/fs"
 	"os"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/require"
 	apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
@@ -153,6 +154,8 @@ func DefaultOptions(t *testing.T) confsuite.ConformanceOptions {
 
 	baseManifestsValue := "resources/manifests/manifests.yaml"
 
+	config := confconfig.DefaultTimeoutConfig()
+	config.HTTPRouteMustHaveCondition = 300 * time.Second
 	opts := confsuite.ConformanceOptions{
 		Client:               c,
 		ClientOptions:        clientOptions,
@@ -163,7 +166,7 @@ func DefaultOptions(t *testing.T) confsuite.ConformanceOptions {
 		Debug:                *confflags.ShowDebug,
 		CleanupBaseResources: *confflags.CleanupBaseResources,
 		SupportedFeatures:    sets.New[features.FeatureName](),
-		TimeoutConfig:        confconfig.DefaultTimeoutConfig(),
+		TimeoutConfig:        config,
 		SkipTests:            skipTests,
 		ExemptFeatures:       exemptFeatures,
 		RunTest:              *confflags.RunTest,

diff --git a/conformance/tests/basic/gateway_following_epp_routing.go b/conformance/tests/basic/gateway_following_epp_routing.go
@@ -0,0 +1,162 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package basic
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+	"k8s.io/apimachinery/pkg/types" // For standard condition types
+	"sigs.k8s.io/gateway-api/conformance/utils/suite"
+	"sigs.k8s.io/gateway-api/pkg/features" // For standard feature names
+
+	// Import the tests package to append to ConformanceTests
+	"sigs.k8s.io/gateway-api-inference-extension/conformance/tests"
+	"sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config"
+	k8sutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/kubernetes"
+	trafficutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/traffic"
+)
+
+func init() {
+	// Register the InferencePoolAccepted test case with the conformance suite.
+	// This ensures it will be discovered and run by the test runner.
+	tests.ConformanceTests = append(tests.ConformanceTests, GatwayFollowingEPPRouting)
+}
+
+// InferencePoolAccepted defines the test case for verifying basic InferencePool acceptance.
+var GatwayFollowingEPPRouting = suite.ConformanceTest{
+	ShortName:   "GatwayFollowingEPPRouting",
+	Description: "Inference gateway should redirect traffic to an endpoints belonging to what EPP respond endpoints list",
+	Manifests:   []string{"tests/basic/gateway_following_epp_routing.yaml"},
+	Features: []features.FeatureName{
+		features.FeatureName("SupportInferencePool"),
+		features.SupportGateway,
+	},
+	Test: func(t *testing.T, s *suite.ConformanceTestSuite) {
+		const (
+			appBackendNamespace = "gateway-conformance-app-backend"
+			infraNamespace      = "gateway-conformance-infra"
+			hostname            = "primary.example.com"
+			path                = "/primary-gateway-test"
+		)
+
+		httpRouteNN := types.NamespacedName{Name: "httproute-for-primary-gw", Namespace: appBackendNamespace}
+		gatewayNN := types.NamespacedName{Name: "conformance-gateway", Namespace: infraNamespace}
+		poolNN := types.NamespacedName{Name: "normal-gateway-pool", Namespace: appBackendNamespace}
+		backendPodLabels := map[string]string{"app": "infra-backend"}
+
+		k8sutils.HTTPRouteMustBeAcceptedAndResolved(t, s.Client, s.TimeoutConfig, httpRouteNN, gatewayNN)
+		k8sutils.InferencePoolMustBeAcceptedByParent(t, s.Client, poolNN)
+		gwAddr := k8sutils.GetGatewayEndpoint(t, s.Client, s.TimeoutConfig, gatewayNN)
+
+		backendPodIP, err := k8sutils.GetOnePodIPWithLabel(t, s.Client, appBackendNamespace, backendPodLabels)
+		require.NoError(t, err, "Failed to get backend Pod IP address")
+
+		inferenceTimeoutConfig := config.DefaultInferenceExtensionTimeoutConfig()
+		// TODO: replace this with a poll and check.
+		t.Log("Waiting for the httpRoute and inferecePool ready to serve traffic.")
+		time.Sleep(inferenceTimeoutConfig.WaitForHttpRouteAndInferencePoolReadyTimeout)
+
+		correctRequestBody := `{
+            "model": "conformance-fake-model",
+			"prompt": "Write as if you were a critic: San Francisc"
+        }`
+
+		t.Run("Gateway should route traffic to a valid endpoint specified by EPP", func(t *testing.T) {
+			t.Logf("Sending request to %s with EPP header routing to valid IP %s", gwAddr, backendPodIP)
+			eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}
+
+			trafficutils.MakeRequestAndExpectSuccessV2(
+				t,
+				s.RoundTripper,
+				s.TimeoutConfig,
+				gwAddr,
+				hostname,
+				path,
+				"infra-backend-deployment", // This might be better as a constant if used often
+				appBackendNamespace,
+				eppHeader,
+				correctRequestBody,
+				"POST",
+			)
+		})
+
+		t.Run("Gateway should route traffic specified by EPP even an invalidIP and should get response with error code 429", func(t *testing.T) {
+			invalidIP := "256.256.256.256" // An IP that cannot be a real endpoint
+			t.Logf("Sending request to %s with EPP header routing to invalid IP %s", gwAddr, invalidIP)
+			eppHeader := map[string]string{"test-epp-endpoint-selection": invalidIP}
+
+			trafficutils.MakeRequestAndExpectTooManyRequest(
+				t,
+				s.RoundTripper,
+				s.TimeoutConfig,
+				gwAddr,
+				hostname,
+				path,
+				"infra-backend-deployment",
+				appBackendNamespace,
+				eppHeader,
+				correctRequestBody,
+				"POST",
+			)
+		})
+
+		t.Run("Gateway should reject request that is missing the model name and return 400 response", func(t *testing.T) {
+			requestBodyWithoutModel := `{"prompt": "Write as if you were a critic: San Francisc"}`
+			eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}
+			t.Logf("Sending request to %s with a malformed body (missing model)", gwAddr)
+
+			trafficutils.MakeRequestAndExpectBadRequest(
+				t,
+				s.RoundTripper,
+				s.TimeoutConfig,
+				gwAddr,
+				hostname,
+				path,
+				"infra-backend-deployment",
+				appBackendNamespace,
+				eppHeader,
+				requestBodyWithoutModel,
+				"POST",
+			)
+		})
+
+		t.Run("Gateway should reject request that is with a nonexist model name and return 404 response", func(t *testing.T) {
+			requestBodyNonExistModel := `{
+            	"model": "non-exist-model",
+				"prompt": "Write as if you were a critic: San Francisc"
+        	}`
+			eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}
+			t.Logf("Sending request to %s with a malformed body (nonexist model)", gwAddr)
+
+			trafficutils.MakeRequestAndExpectNotFoundV2(
+				t,
+				s.RoundTripper,
+				s.TimeoutConfig,
+				gwAddr,
+				hostname,
+				path,
+				"infra-backend-deployment",
+				appBackendNamespace,
+				eppHeader,
+				requestBodyNonExistModel,
+				"POST",
+			)
+		})
+	},
+}
diff --git a/conformance/tests/basic/gateway_following_epp_routing.yaml b/conformance/tests/basic/gateway_following_epp_routing.yaml
@@ -0,0 +1,203 @@
+# conformance/tests/basic/gateway_following_epp_routing.yaml
+
+# This manifest defines the initial resources for the
+# gateway_following_epp_routing.go conformance test.
+
+# --- Backend Deployment (using standard Gateway API echoserver) ---
+# This Deployment provides Pods for the InferencePool to select.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: infra-backend-deployment
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: infra-backend
+spec:
+  selector:
+    matchLabels:
+      app: infra-backend
+  template:
+    metadata:
+      labels:
+        app: infra-backend
+    spec:
+      containers:
+      - name: echoserver
+        image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
+        ports:
+        - containerPort: 3000
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 3000
+          initialDelaySeconds: 3
+          periodSeconds: 5
+          failureThreshold: 2
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+---
+# --- Backend Service ---
+# Service for the infra-backend-deployment.
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferenceModel
+metadata:
+  name: conformance-fake-model-server
+  namespace: gateway-conformance-app-backend
+spec:
+  modelName: conformance-fake-model
+  criticality: Critical # Mark it as critical to bypass the saturation check since the model server is fake and don't have such metrics. 
+  poolRef:
+    name: normal-gateway-pool
+---
+# --- InferencePool Definition ---
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  name: normal-gateway-pool
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: "infra-backend"
+  targetPortNumber: 3000
+  extensionRef:
+    name: infra-backend-endpoint-picker
+---
+# --- HTTPRoute for Primary Gateway (conformance-gateway) ---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: httproute-for-primary-gw
+  namespace: gateway-conformance-app-backend
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: conformance-gateway
+    namespace: gateway-conformance-infra
+    sectionName: http
+  hostnames:
+  - "primary.example.com"
+  rules:
+  - backendRefs:
+    - group: inference.networking.x-k8s.io
+      kind: InferencePool
+      name: normal-gateway-pool
+    matches:
+    - path:
+        type: PathPrefix
+        value: /primary-gateway-test
+---
+# --- Conformance EPP service Definition ---
+apiVersion: v1
+kind: Service
+metadata:
+  name: infra-backend-endpoint-picker
+  namespace: gateway-conformance-app-backend
+spec:
+  selector:
+    app: infra-backend-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+# --- Conformance EPP Deployment ---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: infra-backend-epp
+  namespace: gateway-conformance-app-backend
+  labels:
+    app: infra-backend-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: infra-backend-epp
+  template:
+    metadata:
+      labels:
+        app: infra-backend-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
+        imagePullPolicy: Always
+        args:
+        - -poolName
+        - "normal-gateway-pool"
+        - -poolNamespace
+        - "gateway-conformance-app-backend"
+        - -v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        env:
+        - name: USE_STREAMING
+          value: "true"
+        - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+---
+# --- Conformance EPP Requried Role and RoleBindings ---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: inference-model-reader
+  namespace: gateway-conformance-app-backend
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels", "inferencepools"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: epp-to-inference-model-reader
+  namespace: gateway-conformance-app-backend
+subjects:
+- kind: ServiceAccount
+  name: default
+  namespace: gateway-conformance-app-backend
+roleRef:
+  kind: Role
+  name: inference-model-reader
+  apiGroup: rbac.authorization.k8s.io