kubernetes-sigs · k8s-ci-robot · Apr 8, 2025 · Apr 8, 2025 · nirrozenbaum · Apr 8, 2025
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
@@ -1,3 +1,6 @@
+# Note: If you change this file, please also change the file used for e2e tests!
+# 
+# https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/test/testdata/inferencepool-e2e.yaml
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:

diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml
@@ -113,5 +113,8 @@ data:
         ensureExist:
           models:
           - base-model: Qwen/Qwen2.5-1.5B
-            id: food-review-1
+            id: food-review
+            source: SriSanth2345/Qwen-1.5B-Tweet-Generations
+          - base-model: Qwen/Qwen2.5-1.5B
+            id: cad-fabricator
             source: SriSanth2345/Qwen-1.5B-Tweet-Generations
diff --git a/test/e2e/epp/README.md b/test/e2e/epp/README.md
@@ -28,6 +28,13 @@ Follow these steps to run the end-to-end tests:
    export HF_TOKEN=<MY_HF_TOKEN>
    ```
 
+1. **(Optional): Set the test namespace**: By default, the e2e test creates resources in the `inf-ext-e2e` namespace.
+   If you would like to change this namespace, set the following environment variable:
+
+   ```sh
+   export E2E_NS=<MY_NS>
+   ```
+
 1. **Run the Tests**: Run the `test-e2e` target:
 
    ```sh

diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -30,6 +30,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	rbacv1 "k8s.io/api/rbac/v1"
 	apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
+	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/serializer"
@@ -55,9 +56,8 @@ const (
 	defaultInterval = time.Millisecond * 250
 	// defaultCurlInterval is the default interval to run the test curl command.
 	defaultCurlInterval = time.Second * 5
-	// nsName is the name of the Namespace used for tests.
-	// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
-	nsName = "default"
+	// defaultNsName is the default name of the Namespace used for tests. Can override using the E2E_NS environment variable.
+	defaultNsName = "inf-ext-e2e"
 	// modelServerName is the name of the model server test resources.
 	modelServerName = "vllm-llama3-8b-instruct"
 	// modelName is the test model name.
@@ -77,7 +77,7 @@ const (
 	// inferModelManifest is the manifest for the inference model CRD.
 	inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
 	// inferExtManifest is the manifest for the inference extension test resources.
-	inferExtManifest = "../../../config/manifests/inferencepool-resources.yaml"
+	inferExtManifest = "../../testdata/inferencepool-e2e.yaml"
 	// envoyManifest is the manifest for the envoy proxy test resources.
 	envoyManifest = "../../testdata/envoy.yaml"
 	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
@@ -91,6 +91,7 @@ var (
 	kubeCli *kubernetes.Clientset
 	scheme  = runtime.NewScheme()
 	cfg     = config.GetConfigOrDie()
+	nsName  string
 )
 
 func TestAPIs(t *testing.T) {
@@ -101,6 +102,11 @@ func TestAPIs(t *testing.T) {
 }
 
 var _ = ginkgo.BeforeSuite(func() {
+	nsName = os.Getenv("E2E_NS")
+	if nsName == "" {
+		nsName = defaultNsName
+	}
+
 	ginkgo.By("Setting up the test suite")
 	setupSuite()
 
@@ -109,6 +115,8 @@ var _ = ginkgo.BeforeSuite(func() {
 })
 
 func setupInfra() {
+	createNamespace(cli, nsName)
+
 	modelServerManifestPath := readModelServerManifestPath()
 	modelServerManifestArray := getYamlsFromModelServerManifest(modelServerManifestPath)
 	if strings.Contains(modelServerManifestArray[0], "hf-token") {
@@ -118,6 +126,7 @@ func setupInfra() {
 		"inferencepools.inference.networking.x-k8s.io":  inferPoolManifest,
 		"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
 	}
+
 	createCRDs(cli, crds)
 	createInferExt(cli, inferExtManifest)
 	createClient(cli, clientManifest)
@@ -182,6 +191,17 @@ var (
 	curlInterval      = defaultCurlInterval
 )
 
+func createNamespace(k8sClient client.Client, ns string) {
+	ginkgo.By("Creating e2e namespace: " + ns)
+	obj := &corev1.Namespace{
+		ObjectMeta: v1.ObjectMeta{
+			Name: ns,
+		},
+	}
+	err := k8sClient.Create(ctx, obj)
+	gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to create e2e test namespace")
+}
+
 // namespaceExists ensures that a specified namespace exists and is ready for use.
 func namespaceExists(k8sClient client.Client, ns string) {
 	ginkgo.By("Ensuring namespace exists: " + ns)
@@ -276,8 +296,15 @@ func createHfSecret(k8sClient client.Client, secretPath string) {
 
 // createEnvoy creates the envoy proxy resources used for testing from the given filePath.
 func createEnvoy(k8sClient client.Client, filePath string) {
+	inManifests := readYaml(filePath)
+	ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
+	outManifests := []string{}
+	for _, m := range inManifests {
+		outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
+	}
+
 	ginkgo.By("Creating envoy proxy resources from manifest: " + filePath)
-	applyYAMLFile(k8sClient, filePath)
+	createObjsFromYaml(k8sClient, outManifests)
 
 	// Wait for the configmap to exist before proceeding with test.
 	cfgMap := &corev1.ConfigMap{}
@@ -302,8 +329,15 @@ func createEnvoy(k8sClient client.Client, filePath string) {
 
 // createInferExt creates the inference extension resources used for testing from the given filePath.
 func createInferExt(k8sClient client.Client, filePath string) {
+	inManifests := readYaml(filePath)
+	ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
+	outManifests := []string{}
+	for _, m := range inManifests {
+		outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
+	}
+
 	ginkgo.By("Creating inference extension resources from manifest: " + filePath)
-	applyYAMLFile(k8sClient, filePath)
+	createObjsFromYaml(k8sClient, outManifests)
 
 	// Wait for the clusterrole to exist.
 	testutils.EventuallyExists(ctx, func() error {

diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
@@ -100,7 +100,7 @@ data:
                           grpc_service:
                             envoy_grpc:
                               cluster_name: ext_proc
-                              authority: vllm-llama3-8b-instruct-epp.default:9002
+                              authority: vllm-llama3-8b-instruct-epp.$E2E_NS:9002
                             timeout: 10s
                           processing_mode:
                             request_header_mode: SEND
@@ -195,7 +195,7 @@ data:
                   - endpoint:
                       address:
                         socket_address:
-                          address: vllm-llama3-8b-instruct-epp.default
+                          address: vllm-llama3-8b-instruct-epp.$E2E_NS
                           port_value: 9002
                     health_status: HEALTHY
                     load_balancing_weight: 1
@@ -225,7 +225,7 @@ spec:
         image: docker.io/envoyproxy/envoy:distroless-v1.33.2
         args:
           - "--service-cluster" 
-          - "default/inference-gateway"
+          - "$E2E_NS/inference-gateway"
           - "--service-node"
           - "$(ENVOY_POD_NAME)"
           - "--log-level"

diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
@@ -0,0 +1,126 @@
+apiVersion: inference.networking.x-k8s.io/v1alpha2
+kind: InferencePool
+metadata:
+  labels:
+  name: vllm-llama3-8b-instruct
+spec:
+  targetPortNumber: 8000
+  selector:
+    app: vllm-llama3-8b-instruct
+  extensionRef:
+    name: vllm-llama3-8b-instruct-epp
+    namespace: $E2E_NS
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+spec:
+  selector:
+    app: vllm-llama3-8b-instruct-epp
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+      appProtocol: http2
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+  labels:
+    app: vllm-llama3-8b-instruct-epp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vllm-llama3-8b-instruct-epp
+  template:
+    metadata:
+      labels:
+        app: vllm-llama3-8b-instruct-epp
+    spec:
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      - name: epp
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
+        imagePullPolicy: Always
+        args:
+        - -poolName
+        - "vllm-llama3-8b-instruct"
+        - -poolNamespace
+        - "$E2E_NS"
+        - -v
+        - "4"
+        - --zap-encoder
+        - "json"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        env:
+        - name: USE_STREAMING
+          value: "true"
+        ports:
+        - containerPort: 9002
+        - containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["discovery.k8s.io"]
+  resources: ["endpointslices"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+--- 
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+subjects:
+- kind: ServiceAccount
+  name: default
+  namespace: $E2E_NS
+roleRef:
+  kind: ClusterRole
+  name: pod-read