Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
# The path to the E2E manifest file. It can be overridden by setting the
# E2E_MANIFEST_PATH environment variable. Note that HF_TOKEN must be set when using the GPU-based manifest.
E2E_MANIFEST_PATH ?= config/manifests/vllm/sim-deployment.yaml
# E2E_USE_KIND is a flag used in test-e2e target. when set to true it will load the e2e image into the kind cluster.
# it is possible though to run e2e tests against clusters other than kind. in such a case, it is the user's responsibility to load
# the image into the cluster.
E2E_USE_KIND ?= true

SYNCER_IMAGE_NAME := lora-syncer
SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME)
Expand Down Expand Up @@ -138,7 +142,7 @@ test-integration: ## Run integration tests.

.PHONY: test-e2e
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster.
MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) ./hack/run-e2es.sh
MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) E2E_IMAGE=$(IMAGE_TAG) USE_KIND=$(E2E_USE_KIND) ./hack/test-e2e.sh

.PHONY: lint
lint: golangci-lint ## Run golangci-lint linter
Expand Down
43 changes: 0 additions & 43 deletions hack/run-e2es.sh

This file was deleted.

167 changes: 35 additions & 132 deletions hack/test-e2e.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash

# Copyright 2025 The Kubernetes Authors.
#
Expand All @@ -14,138 +14,41 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# This script verifies end-to-end connectivity for an example inference extension test environment based on
# resources from the quickstart guide or e2e test framework. It can optionally launch a "curl" client pod to
# run these tests within the cluster.
#
# USAGE: ./hack/e2e-test.sh
#
# OPTIONAL ENVIRONMENT VARIABLES:
# - TIME: The duration (in seconds) for which the test will run. Defaults to 1 second.
# - CURL_POD: If set to "true", the script will use a Kubernetes pod named "curl" for making requests.
# - IP: Override the detected IP address. If not provided, the script attempts to use a Gateway based on
# the quickstart guide or an Envoy service IP based on the e2e test framework.
# - PORT: Override the detected port. If not provided, the script attempts to use a Gateway based on the
# quickstart guide or an Envoy service IP based on the e2e test framework.
#
# WHAT THE SCRIPT DOES:
# 1. Determines if there is a Gateway named "inference-gateway" in the "default" namespace. If found, it extracts the IP
# address and port from the Gateway's "llm-gw" listener. Otherwise, it falls back to the Envoy service in the "default" namespace.
# 2. Optionally checks for (or creates) a "curl" pod, ensuring it is ready to execute requests.
# 3. Loops for $TIME seconds, sending requests every 5 seconds to the /v1/completions endpoint to confirm successful connectivity.

set -euo pipefail

# Determine the directory of this script and build an absolute path to client.yaml.
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
CLIENT_YAML="$SCRIPT_DIR/../test/testdata/client.yaml"

# TIME is the amount of time, in seconds, to run the test.
TIME=${TIME:-1}
# Optionally use a client curl pod for executing the curl command.
CURL_POD=${CURL_POD:-false}

check_resource_exists() {
local type=$1
local name=$2
local namespace=$3

if kubectl get "$type" "$name" -n "$namespace" &>/dev/null; then
return 0
else
return 1
fi
set -euox pipefail

install_kind() {
if ! command -v kind &>/dev/null; then
echo "kind not found, installing..."
[ $(uname -m) = x86_64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.29.0/kind-linux-amd64
# For ARM64
[ $(uname -m) = aarch64 ] && curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.29.0/kind-linux-arm64
chmod +x ./kind
mv ./kind /usr/local/bin/kind
else
echo "kind is already installed."
fi
}

check_pod_ready() {
local pod_name=$1
local namespace=$2
# Check the Ready condition using jsonpath. Default to False if not found.
local ready_status
ready_status=$(kubectl get pod "$pod_name" -n "$namespace" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "False")
if [[ "$ready_status" == "True" ]]; then
return 0
else
return 1
fi
}

# Try to get the Gateway's IP and the port from the listener named "llm-gw" if it exists.
if check_resource_exists "gateway" "inference-gateway" "default"; then
GATEWAY_IP=$(kubectl get gateway inference-gateway -n default -o jsonpath='{.status.addresses[0].value}')
# Use JSONPath to select the port from the listener with name "http"
GATEWAY_PORT=$(kubectl get gateway inference-gateway -n default -o jsonpath='{.spec.listeners[?(@.name=="http")].port}')
else
GATEWAY_IP=""
GATEWAY_PORT=""
if [ "$USE_KIND" = "true" ]; then
install_kind # make sure kind cli is installed
if ! kubectl config current-context >/dev/null 2>&1; then # if no active kind cluster found
echo "No active kubecontext found. creating a kind cluster for running the tests..."
kind create cluster --name inference-e2e
KIND_CLUSTER=inference-e2e IMAGE_TAG=${E2E_IMAGE} make image-kind
else
current_context=$(kubectl config current-context)
current_kind_cluster="${current_context#kind-}"
echo "Found an active kind cluster ${current_kind_cluster} for running the tests..."
KIND_CLUSTER=${current_kind_cluster} IMAGE_TAG=${E2E_IMAGE} make image-kind
fi
else
# don't use kind. it's the caller responsibility to load the image into the cluster, we just run the tests.
# this section is useful when one wants to run an official release or latest main against a cluster other than kind.
if ! kubectl config current-context >/dev/null 2>&1; then # if no active cluster found
echo "No active kubecontext found. exiting..."
exit
fi
fi

if [[ -n "$GATEWAY_IP" && -n "$GATEWAY_PORT" ]]; then
echo "Using Gateway inference-gateway IP and port from listener 'llm-gw'."
IP=${IP:-$GATEWAY_IP}
PORT=${PORT:-$GATEWAY_PORT}
else
echo "Gateway inference-gateway not found or missing IP/port. Falling back to Envoy service."
# Ensure the Envoy service exists.
if ! check_resource_exists "svc" "envoy" "default"; then
echo "Error: Envoy service not found in namespace 'default'."
exit 1
fi
IP=${IP:-$(kubectl get svc envoy -n default -o jsonpath='{.spec.clusterIP}')}
PORT=${PORT:-$(kubectl get svc envoy -n default -o jsonpath='{.spec.ports[0].port}')}
fi

# Optionally verify that the curl pod exists and is ready.
if [[ "$CURL_POD" == "true" ]]; then
if ! check_resource_exists "pod" "curl" "default"; then
echo "Pod 'curl' not found in namespace 'default'. Applying client.yaml from $CLIENT_YAML..."
kubectl apply -f "$CLIENT_YAML"
fi
echo "Waiting for pod 'curl' to be ready..."
# Retry every 5 seconds for up to 30 seconds (6 attempts)
for i in {1..6}; do
if check_pod_ready "curl" "default"; then
echo "Pod 'curl' is now ready."
break
fi
echo "Retry attempt $i: Pod 'curl' not ready; waiting 5 seconds..."
sleep 5
done

if ! check_pod_ready "curl" "default"; then
echo "Error: Pod 'curl' is still not ready in namespace 'default' after 30 seconds."
exit 1
fi
fi

# Validate that we have a non-empty IP and PORT.
if [[ -z "$IP" ]]; then
echo "Error: Unable to determine a valid IP from either Gateway or Envoy service."
exit 1
fi

if [[ -z "$PORT" ]]; then
echo "Error: Unable to determine a valid port from either Gateway or Envoy service."
exit 1
fi

echo "Using IP: $IP"
echo "Using PORT: $PORT"

# Run the test for the specified duration.
end=$((SECONDS + TIME))
if [[ "$CURL_POD" == "true" ]]; then
while [ $SECONDS -lt $end ]; do
kubectl exec po/curl -- curl -i "$IP:$PORT/v1/completions" \
-H 'Content-Type: application/json' \
-d '{"model": "food-review","prompt": "Write as if you were a critic: San Francisco","max_tokens": 100,"temperature": 0}'
sleep 5
done
else
while [ $SECONDS -lt $end ]; do
curl -i "$IP:$PORT/v1/completions" \
-H 'Content-Type: application/json' \
-d '{"model": "food-review","prompt": "Write as if you were a critic: San Francisco","max_tokens": 100,"temperature": 0}'
sleep 5
done
fi
echo "Found an active cluster. Running Go e2e tests in ./epp..."
go test ./test/e2e/epp/ -v -ginkgo.v
39 changes: 26 additions & 13 deletions test/e2e/epp/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,14 @@ const (
)

var (
ctx context.Context
ctx = context.Background()
cli client.Client
// Required for exec'ing in curl pod
kubeCli *kubernetes.Clientset
scheme = runtime.NewScheme()
cfg = config.GetConfigOrDie()
nsName string
kubeCli *kubernetes.Clientset
scheme = runtime.NewScheme()
cfg = config.GetConfigOrDie()
nsName string
e2eImage string
)

func TestAPIs(t *testing.T) {
Expand All @@ -108,6 +109,8 @@ var _ = ginkgo.BeforeSuite(func() {
if nsName == "" {
nsName = defaultNsName
}
e2eImage = os.Getenv("E2E_IMAGE")
gomega.Expect(e2eImage).NotTo(gomega.BeEmpty(), "E2E_IMAGE environment variable is not set")

ginkgo.By("Setting up the test suite")
setupSuite()
Expand All @@ -117,9 +120,12 @@ var _ = ginkgo.BeforeSuite(func() {
})

func setupInfra() {
// this function ensures ModelServer manifest path exists.
// run this before createNs to fail fast in case it doesn't.
modelServerManifestPath := readModelServerManifestPath()

createNamespace(cli, nsName)

modelServerManifestPath := readModelServerManifestPath()
modelServerManifestArray := getYamlsFromModelServerManifest(modelServerManifestPath)
if strings.Contains(modelServerManifestArray[0], "hf-token") {
createHfSecret(cli, modelServerSecretManifest)
Expand All @@ -134,7 +140,8 @@ func setupInfra() {
createClient(cli, clientManifest)
createEnvoy(cli, envoyManifest)
// Run this step last, as it requires additional time for the model server to become ready.
createModelServer(cli, modelServerManifestArray, modelServerManifestPath)
ginkgo.By("Creating model server resources from manifest: " + modelServerManifestPath)
createModelServer(cli, modelServerManifestArray)
}

var _ = ginkgo.AfterSuite(func() {
Expand All @@ -145,7 +152,6 @@ var _ = ginkgo.AfterSuite(func() {
// setupSuite initializes the test suite by setting up the Kubernetes client,
// loading required API schemes, and validating configuration.
func setupSuite() {
ctx = context.Background()
gomega.ExpectWithOffset(1, cfg).NotTo(gomega.BeNil())

err := clientgoscheme.AddToScheme(scheme)
Expand All @@ -167,6 +173,10 @@ func setupSuite() {
}

func cleanupResources() {
if cli == nil {
return // could happen if BeforeSuite had an error
}

gomega.Expect(testutils.DeleteClusterResources(ctx, cli)).To(gomega.Succeed())
gomega.Expect(testutils.DeleteNamespacedResources(ctx, cli, nsName)).To(gomega.Succeed())
}
Expand Down Expand Up @@ -260,8 +270,7 @@ func createClient(k8sClient client.Client, filePath string) {
}

// createModelServer creates the model server resources used for testing from the given filePaths.
func createModelServer(k8sClient client.Client, modelServerManifestArray []string, deployPath string) {
ginkgo.By("Creating model server resources from manifest: " + deployPath)
func createModelServer(k8sClient client.Client, modelServerManifestArray []string) {
createObjsFromYaml(k8sClient, modelServerManifestArray)

// Wait for the deployment to exist.
Expand Down Expand Up @@ -332,10 +341,14 @@ func createEnvoy(k8sClient client.Client, filePath string) {
// createInferExt creates the inference extension resources used for testing from the given filePath.
func createInferExt(k8sClient client.Client, filePath string) {
inManifests := readYaml(filePath)
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
ginkgo.By("Replacing placeholders with environment variables")
outManifests := []string{}
for _, m := range inManifests {
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
for _, manifest := range inManifests {
replacer := strings.NewReplacer(
"$E2E_NS", nsName,
"$E2E_IMAGE", e2eImage,
)
outManifests = append(outManifests, replacer.Replace(manifest))
}

ginkgo.By("Creating inference extension resources from manifest: " + filePath)
Expand Down
4 changes: 2 additions & 2 deletions test/testdata/inferencepool-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
image: $E2E_IMAGE
imagePullPolicy: IfNotPresent
args:
- -poolName
- "vllm-llama3-8b-instruct"
Expand Down