Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config/manifests/inferencepool-resources.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Note: If you change this file, please also change the file used for e2e tests!
#
# https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/test/testdata/inferencepool-e2e.yaml
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
Expand Down
5 changes: 4 additions & 1 deletion config/manifests/vllm/cpu-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,8 @@ data:
ensureExist:
models:
- base-model: Qwen/Qwen2.5-1.5B
id: food-review-1
id: food-review
source: SriSanth2345/Qwen-1.5B-Tweet-Generations
- base-model: Qwen/Qwen2.5-1.5B
id: cad-fabricator
source: SriSanth2345/Qwen-1.5B-Tweet-Generations
7 changes: 7 additions & 0 deletions test/e2e/epp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ Follow these steps to run the end-to-end tests:
export HF_TOKEN=<MY_HF_TOKEN>
```

1. **(Optional): Set the test namespace**: By default, the e2e test creates resources in the `inf-ext-e2e` namespace.
If you would like to change this namespace, set the following environment variable:

```sh
export E2E_NS=<MY_NS>
```

1. **Run the Tests**: Run the `test-e2e` target:

```sh
Expand Down
46 changes: 40 additions & 6 deletions test/e2e/epp/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
Expand All @@ -55,9 +56,8 @@ const (
defaultInterval = time.Millisecond * 250
// defaultCurlInterval is the default interval to run the test curl command.
defaultCurlInterval = time.Second * 5
// nsName is the name of the Namespace used for tests.
// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
nsName = "default"
// defaultNsName is the default name of the Namespace used for tests. Can override using the E2E_NS environment variable.
defaultNsName = "inf-ext-e2e"
// modelServerName is the name of the model server test resources.
modelServerName = "vllm-llama3-8b-instruct"
// modelName is the test model name.
Expand All @@ -77,7 +77,7 @@ const (
// inferModelManifest is the manifest for the inference model CRD.
inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
// inferExtManifest is the manifest for the inference extension test resources.
inferExtManifest = "../../../config/manifests/inferencepool-resources.yaml"
inferExtManifest = "../../testdata/inferencepool-e2e.yaml"
// envoyManifest is the manifest for the envoy proxy test resources.
envoyManifest = "../../testdata/envoy.yaml"
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
Expand All @@ -91,6 +91,7 @@ var (
kubeCli *kubernetes.Clientset
scheme = runtime.NewScheme()
cfg = config.GetConfigOrDie()
nsName string
)

func TestAPIs(t *testing.T) {
Expand All @@ -101,6 +102,11 @@ func TestAPIs(t *testing.T) {
}

var _ = ginkgo.BeforeSuite(func() {
nsName = os.Getenv("E2E_NS")
if nsName == "" {
nsName = defaultNsName
}

ginkgo.By("Setting up the test suite")
setupSuite()

Expand All @@ -109,6 +115,8 @@ var _ = ginkgo.BeforeSuite(func() {
})

func setupInfra() {
createNamespace(cli, nsName)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the namespace deletion to AfterSuite cleanup?
all resources that are created in the test should be deleted at the end of it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


modelServerManifestPath := readModelServerManifestPath()
modelServerManifestArray := getYamlsFromModelServerManifest(modelServerManifestPath)
if strings.Contains(modelServerManifestArray[0], "hf-token") {
Expand All @@ -118,6 +126,7 @@ func setupInfra() {
"inferencepools.inference.networking.x-k8s.io": inferPoolManifest,
"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
}

createCRDs(cli, crds)
createInferExt(cli, inferExtManifest)
createClient(cli, clientManifest)
Expand Down Expand Up @@ -182,6 +191,17 @@ var (
curlInterval = defaultCurlInterval
)

func createNamespace(k8sClient client.Client, ns string) {
ginkgo.By("Creating e2e namespace: " + ns)
obj := &corev1.Namespace{
ObjectMeta: v1.ObjectMeta{
Name: ns,
},
}
err := k8sClient.Create(ctx, obj)
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to create e2e test namespace")
}

// namespaceExists ensures that a specified namespace exists and is ready for use.
func namespaceExists(k8sClient client.Client, ns string) {
ginkgo.By("Ensuring namespace exists: " + ns)
Expand Down Expand Up @@ -276,8 +296,15 @@ func createHfSecret(k8sClient client.Client, secretPath string) {

// createEnvoy creates the envoy proxy resources used for testing from the given filePath.
func createEnvoy(k8sClient client.Client, filePath string) {
inManifests := readYaml(filePath)
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
outManifests := []string{}
for _, m := range inManifests {
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
}

ginkgo.By("Creating envoy proxy resources from manifest: " + filePath)
applyYAMLFile(k8sClient, filePath)
createObjsFromYaml(k8sClient, outManifests)

// Wait for the configmap to exist before proceeding with test.
cfgMap := &corev1.ConfigMap{}
Expand All @@ -302,8 +329,15 @@ func createEnvoy(k8sClient client.Client, filePath string) {

// createInferExt creates the inference extension resources used for testing from the given filePath.
func createInferExt(k8sClient client.Client, filePath string) {
inManifests := readYaml(filePath)
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
outManifests := []string{}
for _, m := range inManifests {
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
}

ginkgo.By("Creating inference extension resources from manifest: " + filePath)
applyYAMLFile(k8sClient, filePath)
createObjsFromYaml(k8sClient, outManifests)

// Wait for the clusterrole to exist.
testutils.EventuallyExists(ctx, func() error {
Expand Down
6 changes: 3 additions & 3 deletions test/testdata/envoy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ data:
grpc_service:
envoy_grpc:
cluster_name: ext_proc
authority: vllm-llama3-8b-instruct-epp.default:9002
authority: vllm-llama3-8b-instruct-epp.$E2E_NS:9002
timeout: 10s
processing_mode:
request_header_mode: SEND
Expand Down Expand Up @@ -195,7 +195,7 @@ data:
- endpoint:
address:
socket_address:
address: vllm-llama3-8b-instruct-epp.default
address: vllm-llama3-8b-instruct-epp.$E2E_NS
port_value: 9002
health_status: HEALTHY
load_balancing_weight: 1
Expand Down Expand Up @@ -225,7 +225,7 @@ spec:
image: docker.io/envoyproxy/envoy:distroless-v1.33.2
args:
- "--service-cluster"
- "default/inference-gateway"
- "$E2E_NS/inference-gateway"
- "--service-node"
- "$(ENVOY_POD_NAME)"
- "--log-level"
Expand Down
126 changes: 126 additions & 0 deletions test/testdata/inferencepool-e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
apiVersion: inference.networking.x-k8s.io/v1alpha2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a duplication of the existing inferencepool-resources.yaml, excluding the namespace field.
I would prefer if we can remove the namespace field from all resources in the inferencepool-resources.yaml file and specify the namespace in the e2e test (as env var as specified in other comment).

there is a big value in testing e2e our "Getting Started" files, so we are self testing the public documentation.
I would avoid having duplications as much as possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would you suggest getting the env var parsed in the YAML file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose I could use a similar approach to the one you described above. However, that might complicate the getting started guide? How would the user of the guide make sure the env var was parsed in the file?

Copy link
Contributor

@nirrozenbaum nirrozenbaum Apr 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the e2e a similar approach would work. in getting started guide I’m not sure we have an agreement of the other maintainers about being able to choose the namespace (I’m in favor of it).
if there is an agreement, you can use envsubst command to replace env var with it’s value.

for example -
envsubst < myfile.yaml | kubectl apply -f -

this discussion is relevant only for the subject serviceaccount in the ClusterRoleBinding, as the namespace in the Service and Deployment resources can be just removed and then you can decide the ns during e2e test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we tackle this in a follow-up then? If we change the getting started file in this PR its going to expand the scope considerably as we now need to update the guide with the envvar substitution command.

cc @danehans

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we change the getting started file in this PR

+1 on not changing the getting started guide in this PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a big value in testing e2e our "Getting Started" files, so we are self testing the public documentation.

I agree with this point. We may need to consider 2 different e2e tests, 1 for testing the quickstart and another that is more programmatic to support various inference use cases. Maybe we track this in a separate issue?

cc: @ahg-g @kfswain

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to move away from yaml files and just create those objects in code,,, the envoy one is complex, so I don't mind it, but other than that I think it is more manageable to have the test create those objects in code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, well, creating the pool requires also creating all the epp and the rbac resources, so it is also complex...

kind: InferencePool
metadata:
labels:
name: vllm-llama3-8b-instruct
spec:
targetPortNumber: 8000
selector:
app: vllm-llama3-8b-instruct
extensionRef:
name: vllm-llama3-8b-instruct-epp
namespace: $E2E_NS
---
apiVersion: v1
kind: Service
metadata:
name: vllm-llama3-8b-instruct-epp
namespace: $E2E_NS
spec:
selector:
app: vllm-llama3-8b-instruct-epp
ports:
- protocol: TCP
port: 9002
targetPort: 9002
appProtocol: http2
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-llama3-8b-instruct-epp
namespace: $E2E_NS
labels:
app: vllm-llama3-8b-instruct-epp
spec:
replicas: 1
selector:
matchLabels:
app: vllm-llama3-8b-instruct-epp
template:
metadata:
labels:
app: vllm-llama3-8b-instruct-epp
spec:
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- "vllm-llama3-8b-instruct"
- -poolNamespace
- "$E2E_NS"
- -v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
env:
- name: USE_STREAMING
value: "true"
ports:
- containerPort: 9002
- containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-read
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencepools"]
verbs: ["get", "watch", "list"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["get", "watch", "list"]
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pod-read-binding
subjects:
- kind: ServiceAccount
name: default
namespace: $E2E_NS
roleRef:
kind: ClusterRole
name: pod-read