kubernetes-sigs · k8s-ci-robot · Jun 27, 2025 · Jun 15, 2025 · Jun 16, 2025 · Jun 25, 2025
diff --git a/Makefile b/Makefile
@@ -38,6 +38,10 @@ PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
 # The path to the E2E manifest file. It can be overridden by setting the
 # E2E_MANIFEST_PATH environment variable. Note that HF_TOKEN must be set when using the GPU-based manifest.
 E2E_MANIFEST_PATH ?= config/manifests/vllm/sim-deployment.yaml
+# E2E_USE_KIND is a flag used in test-e2e target. when set to true it will load the e2e image into the kind cluster.
+# it is possible though to run e2e tests against clusters other than kind. in such a case, it is the user's responsibility to load
+# the image into the cluster.
+E2E_USE_KIND ?= true
 
 SYNCER_IMAGE_NAME := lora-syncer
 SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)
@@ -138,7 +142,7 @@ test-integration: ## Run integration tests.
 
 .PHONY: test-e2e
 test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster.
-	MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) ./hack/run-e2es.sh
+	MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) E2E_IMAGE=$(IMAGE_TAG) USE_KIND=$(E2E_USE_KIND) ./hack/test-e2e.sh
 
 .PHONY: lint
 lint: golangci-lint ## Run golangci-lint linter

diff --git a/hack/run-e2es.sh b/hack/run-e2es.sh
diff --git a/hack/test-e2e.sh b/hack/test-e2e.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright 2025 The Kubernetes Authors.
 #
@@ -14,138 +14,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This script verifies end-to-end connectivity for an example inference extension test environment based on
-# resources from the quickstart guide or e2e test framework. It can optionally launch a "curl" client pod to
-# run these tests within the cluster.
-#
-# USAGE: ./hack/e2e-test.sh
-#
-# OPTIONAL ENVIRONMENT VARIABLES:
-#   - TIME:     The duration (in seconds) for which the test will run. Defaults to 1 second.
-#   - CURL_POD: If set to "true", the script will use a Kubernetes pod named "curl" for making requests.
-#   - IP:       Override the detected IP address. If not provided, the script attempts to use a Gateway based on
-#               the quickstart guide or an Envoy service IP based on the e2e test framework.
-#   - PORT:     Override the detected port. If not provided, the script attempts to use a Gateway based on the
-#               quickstart guide or an Envoy service IP based on the e2e test framework.
-#
-# WHAT THE SCRIPT DOES:
-#   1. Determines if there is a Gateway named "inference-gateway" in the "default" namespace. If found, it extracts the IP
-#      address and port from the Gateway's "llm-gw" listener. Otherwise, it falls back to the Envoy service in the "default" namespace.
-#   2. Optionally checks for (or creates) a "curl" pod, ensuring it is ready to execute requests.
-#   3. Loops for $TIME seconds, sending requests every 5 seconds to the /v1/completions endpoint to confirm successful connectivity.
-
-set -euo pipefail
-
-# Determine the directory of this script and build an absolute path to client.yaml.
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-CLIENT_YAML="$SCRIPT_DIR/../test/testdata/client.yaml"
-
-# TIME is the amount of time, in seconds, to run the test.
-TIME=${TIME:-1}
-# Optionally use a client curl pod for executing the curl command.
-CURL_POD=${CURL_POD:-false}
-
-check_resource_exists() {
-    local type=$1
-    local name=$2
-    local namespace=$3
-
-    if kubectl get "$type" "$name" -n "$namespace" &>/dev/null; then
-         return 0
-    else
-         return 1
-    fi
+set -euox pipefail
+
+install_kind() {
+  if ! command -v kind &>/dev/null; then
+    echo "kind not found, installing..."
+    [ $(uname -m) = x86_64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.29.0/kind-linux-amd64
+    # For ARM64
+    [ $(uname -m) = aarch64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.29.0/kind-linux-arm64
+    chmod +x ./kind
+    mv ./kind /usr/local/bin/kind
+  else
+    echo "kind is already installed."
+  fi
 }
 
-check_pod_ready() {
-    local pod_name=$1
-    local namespace=$2
-    # Check the Ready condition using jsonpath. Default to False if not found.
-    local ready_status
-    ready_status=$(kubectl get pod "$pod_name" -n "$namespace" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "False")
-    if [[ "$ready_status" == "True" ]]; then
-        return 0
-    else
-        return 1
-    fi
-}
-
-# Try to get the Gateway's IP and the port from the listener named "llm-gw" if it exists.
-if check_resource_exists "gateway" "inference-gateway" "default"; then
-    GATEWAY_IP=$(kubectl get gateway inference-gateway -n default -o jsonpath='{.status.addresses[0].value}')
-    # Use JSONPath to select the port from the listener with name "http"
-    GATEWAY_PORT=$(kubectl get gateway inference-gateway -n default -o jsonpath='{.spec.listeners[?(@.name=="http")].port}')
-else
-    GATEWAY_IP=""
-    GATEWAY_PORT=""
+if [ "$USE_KIND" = "true" ]; then
+  install_kind # make sure kind cli is installed
+  if ! kubectl config current-context >/dev/null 2>&1; then # if no active kind cluster found
+    echo "No active kubecontext found. creating a kind cluster for running the tests..."
+    kind create cluster --name inference-e2e
+    KIND_CLUSTER=inference-e2e IMAGE_TAG=${E2E_IMAGE} make image-kind
+  else 
+    current_context=$(kubectl config current-context)
+    current_kind_cluster="${current_context#kind-}"
+    echo "Found an active kind cluster ${current_kind_cluster} for running the tests..."
+    KIND_CLUSTER=${current_kind_cluster} IMAGE_TAG=${E2E_IMAGE} make image-kind
+  fi 
+else 
+  # don't use kind. it's the caller responsibility to load the image into the cluster, we just run the tests.
+  # this section is useful when one wants to run an official release or latest main against a cluster other than kind.
+  if ! kubectl config current-context >/dev/null 2>&1; then # if no active cluster found
+    echo "No active kubecontext found. exiting..."
+    exit
+  fi
 fi
 
-if [[ -n "$GATEWAY_IP" && -n "$GATEWAY_PORT" ]]; then
-    echo "Using Gateway inference-gateway IP and port from listener 'llm-gw'."
-    IP=${IP:-$GATEWAY_IP}
-    PORT=${PORT:-$GATEWAY_PORT}
-else
-    echo "Gateway inference-gateway not found or missing IP/port. Falling back to Envoy service."
-    # Ensure the Envoy service exists.
-    if ! check_resource_exists "svc" "envoy" "default"; then
-        echo "Error: Envoy service not found in namespace 'default'."
-        exit 1
-    fi
-    IP=${IP:-$(kubectl get svc envoy -n default -o jsonpath='{.spec.clusterIP}')}
-    PORT=${PORT:-$(kubectl get svc envoy -n default -o jsonpath='{.spec.ports[0].port}')}
-fi
-
-# Optionally verify that the curl pod exists and is ready.
-if [[ "$CURL_POD" == "true" ]]; then
-    if ! check_resource_exists "pod" "curl" "default"; then
-        echo "Pod 'curl' not found in namespace 'default'. Applying client.yaml from $CLIENT_YAML..."
-        kubectl apply -f "$CLIENT_YAML"
-    fi
-    echo "Waiting for pod 'curl' to be ready..."
-    # Retry every 5 seconds for up to 30 seconds (6 attempts)
-    for i in {1..6}; do
-        if check_pod_ready "curl" "default"; then
-            echo "Pod 'curl' is now ready."
-            break
-        fi
-        echo "Retry attempt $i: Pod 'curl' not ready; waiting 5 seconds..."
-        sleep 5
-    done
-
-    if ! check_pod_ready "curl" "default"; then
-        echo "Error: Pod 'curl' is still not ready in namespace 'default' after 30 seconds."
-        exit 1
-    fi
-fi
-
-# Validate that we have a non-empty IP and PORT.
-if [[ -z "$IP" ]]; then
-    echo "Error: Unable to determine a valid IP from either Gateway or Envoy service."
-    exit 1
-fi
-
-if [[ -z "$PORT" ]]; then
-    echo "Error: Unable to determine a valid port from either Gateway or Envoy service."
-    exit 1
-fi
-
-echo "Using IP: $IP"
-echo "Using PORT: $PORT"
-
-# Run the test for the specified duration.
-end=$((SECONDS + TIME))
-if [[ "$CURL_POD" == "true" ]]; then
-    while [ $SECONDS -lt $end ]; do
-        kubectl exec po/curl -- curl -i "$IP:$PORT/v1/completions" \
-            -H 'Content-Type: application/json' \
-            -d '{"model": "food-review","prompt": "Write as if you were a critic: San Francisco","max_tokens": 100,"temperature": 0}'
-        sleep 5
-    done
-else
-    while [ $SECONDS -lt $end ]; do
-        curl -i "$IP:$PORT/v1/completions" \
-            -H 'Content-Type: application/json' \
-            -d '{"model": "food-review","prompt": "Write as if you were a critic: San Francisco","max_tokens": 100,"temperature": 0}'
-        sleep 5
-    done
-fi
+echo "Found an active cluster. Running Go e2e tests in ./epp..."
+go test ./test/e2e/epp/ -v -ginkgo.v
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -87,13 +87,14 @@ const (
 )
 
 var (
-	ctx context.Context
+	ctx = context.Background()
 	cli client.Client
 	// Required for exec'ing in curl pod
-	kubeCli *kubernetes.Clientset
-	scheme  = runtime.NewScheme()
-	cfg     = config.GetConfigOrDie()
-	nsName  string
+	kubeCli  *kubernetes.Clientset
+	scheme   = runtime.NewScheme()
+	cfg      = config.GetConfigOrDie()
+	nsName   string
+	e2eImage string
 )
 
 func TestAPIs(t *testing.T) {
@@ -108,6 +109,8 @@ var _ = ginkgo.BeforeSuite(func() {
 	if nsName == "" {
 		nsName = defaultNsName
 	}
+	e2eImage = os.Getenv("E2E_IMAGE")
+	gomega.Expect(e2eImage).NotTo(gomega.BeEmpty(), "E2E_IMAGE environment variable is not set")
 
 	ginkgo.By("Setting up the test suite")
 	setupSuite()
@@ -117,9 +120,12 @@ var _ = ginkgo.BeforeSuite(func() {
 })
 
 func setupInfra() {
+	// this function ensures ModelServer manifest path exists.
+	// run this before createNs to fail fast in case it doesn't.
+	modelServerManifestPath := readModelServerManifestPath()
+
 	createNamespace(cli, nsName)
 
-	modelServerManifestPath := readModelServerManifestPath()
 	modelServerManifestArray := getYamlsFromModelServerManifest(modelServerManifestPath)
 	if strings.Contains(modelServerManifestArray[0], "hf-token") {
 		createHfSecret(cli, modelServerSecretManifest)
@@ -134,7 +140,8 @@ func setupInfra() {
 	createClient(cli, clientManifest)
 	createEnvoy(cli, envoyManifest)
 	// Run this step last, as it requires additional time for the model server to become ready.
-	createModelServer(cli, modelServerManifestArray, modelServerManifestPath)
+	ginkgo.By("Creating model server resources from manifest: " + modelServerManifestPath)
+	createModelServer(cli, modelServerManifestArray)
 }
 
 var _ = ginkgo.AfterSuite(func() {
@@ -145,7 +152,6 @@ var _ = ginkgo.AfterSuite(func() {
 // setupSuite initializes the test suite by setting up the Kubernetes client,
 // loading required API schemes, and validating configuration.
 func setupSuite() {
-	ctx = context.Background()
 	gomega.ExpectWithOffset(1, cfg).NotTo(gomega.BeNil())
 
 	err := clientgoscheme.AddToScheme(scheme)
@@ -167,6 +173,10 @@ func setupSuite() {
 }
 
 func cleanupResources() {
+	if cli == nil {
+		return // could happen if BeforeSuite had an error
+	}
+
 	gomega.Expect(testutils.DeleteClusterResources(ctx, cli)).To(gomega.Succeed())
 	gomega.Expect(testutils.DeleteNamespacedResources(ctx, cli, nsName)).To(gomega.Succeed())
 }
@@ -260,8 +270,7 @@ func createClient(k8sClient client.Client, filePath string) {
 }
 
 // createModelServer creates the model server resources used for testing from the given filePaths.
-func createModelServer(k8sClient client.Client, modelServerManifestArray []string, deployPath string) {
-	ginkgo.By("Creating model server resources from manifest: " + deployPath)
+func createModelServer(k8sClient client.Client, modelServerManifestArray []string) {
 	createObjsFromYaml(k8sClient, modelServerManifestArray)
 
 	// Wait for the deployment to exist.
@@ -332,10 +341,14 @@ func createEnvoy(k8sClient client.Client, filePath string) {
 // createInferExt creates the inference extension resources used for testing from the given filePath.
 func createInferExt(k8sClient client.Client, filePath string) {
 	inManifests := readYaml(filePath)
-	ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
+	ginkgo.By("Replacing placeholders with environment variables")
 	outManifests := []string{}
-	for _, m := range inManifests {
-		outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
+	for _, manifest := range inManifests {
+		replacer := strings.NewReplacer(
+			"$E2E_NS", nsName,
+			"$E2E_IMAGE", e2eImage,
+		)
+		outManifests = append(outManifests, replacer.Replace(manifest))
 	}
 
 	ginkgo.By("Creating inference extension resources from manifest: " + filePath)

diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
@@ -47,8 +47,8 @@ spec:
       terminationGracePeriodSeconds: 130
       containers:
       - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
-        imagePullPolicy: Always
+        image: $E2E_IMAGE
+        imagePullPolicy: IfNotPresent
         args:
         - -poolName
         - "vllm-llama3-8b-instruct"