Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion conformance/conformance.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"io/fs"
"os"
"testing"
"time"

"github.com/stretchr/testify/require"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
Expand Down Expand Up @@ -153,6 +154,8 @@ func DefaultOptions(t *testing.T) confsuite.ConformanceOptions {

baseManifestsValue := "resources/manifests/manifests.yaml"

config := confconfig.DefaultTimeoutConfig()
config.HTTPRouteMustHaveCondition = 300 * time.Second
opts := confsuite.ConformanceOptions{
Client: c,
ClientOptions: clientOptions,
Expand All @@ -163,7 +166,7 @@ func DefaultOptions(t *testing.T) confsuite.ConformanceOptions {
Debug: *confflags.ShowDebug,
CleanupBaseResources: *confflags.CleanupBaseResources,
SupportedFeatures: sets.New[features.FeatureName](),
TimeoutConfig: confconfig.DefaultTimeoutConfig(),
TimeoutConfig: config,
SkipTests: skipTests,
ExemptFeatures: exemptFeatures,
RunTest: *confflags.RunTest,
Expand Down
162 changes: 162 additions & 0 deletions conformance/tests/basic/gateway_following_epp_routing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package basic

import (
"testing"
"time"

"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/types" // For standard condition types
"sigs.k8s.io/gateway-api/conformance/utils/suite"
"sigs.k8s.io/gateway-api/pkg/features" // For standard feature names

// Import the tests package to append to ConformanceTests
"sigs.k8s.io/gateway-api-inference-extension/conformance/tests"
"sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config"
k8sutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/kubernetes"
trafficutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/traffic"
)

func init() {
// Register the InferencePoolAccepted test case with the conformance suite.
// This ensures it will be discovered and run by the test runner.
tests.ConformanceTests = append(tests.ConformanceTests, GatwayFollowingEPPRouting)
}

// InferencePoolAccepted defines the test case for verifying basic InferencePool acceptance.
var GatwayFollowingEPPRouting = suite.ConformanceTest{
ShortName: "GatwayFollowingEPPRouting",
Description: "Inference gateway should redirect traffic to an endpoints belonging to what EPP respond endpoints list",
Manifests: []string{"tests/basic/gateway_following_epp_routing.yaml"},
Features: []features.FeatureName{
features.FeatureName("SupportInferencePool"),
features.SupportGateway,
},
Test: func(t *testing.T, s *suite.ConformanceTestSuite) {
const (
appBackendNamespace = "gateway-conformance-app-backend"
infraNamespace = "gateway-conformance-infra"
hostname = "primary.example.com"
path = "/primary-gateway-test"
)

httpRouteNN := types.NamespacedName{Name: "httproute-for-primary-gw", Namespace: appBackendNamespace}
gatewayNN := types.NamespacedName{Name: "conformance-gateway", Namespace: infraNamespace}
poolNN := types.NamespacedName{Name: "normal-gateway-pool", Namespace: appBackendNamespace}
backendPodLabels := map[string]string{"app": "infra-backend"}

k8sutils.HTTPRouteMustBeAcceptedAndResolved(t, s.Client, s.TimeoutConfig, httpRouteNN, gatewayNN)
k8sutils.InferencePoolMustBeAcceptedByParent(t, s.Client, poolNN)
gwAddr := k8sutils.GetGatewayEndpoint(t, s.Client, s.TimeoutConfig, gatewayNN)

backendPodIP, err := k8sutils.GetOnePodIPWithLabel(t, s.Client, appBackendNamespace, backendPodLabels)
require.NoError(t, err, "Failed to get backend Pod IP address")

inferenceTimeoutConfig := config.DefaultInferenceExtensionTimeoutConfig()
// TODO: replace this with a poll and check.
t.Log("Waiting for the httpRoute and inferecePool ready to serve traffic.")
time.Sleep(inferenceTimeoutConfig.WaitForHttpRouteAndInferencePoolReadyTimeout)

correctRequestBody := `{
"model": "conformance-fake-model",
"prompt": "Write as if you were a critic: San Francisc"
}`

t.Run("Gateway should route traffic to a valid endpoint specified by EPP", func(t *testing.T) {
t.Logf("Sending request to %s with EPP header routing to valid IP %s", gwAddr, backendPodIP)
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}

trafficutils.MakeRequestAndExpectSuccessV2(
t,
s.RoundTripper,
s.TimeoutConfig,
gwAddr,
hostname,
path,
"infra-backend-deployment", // This might be better as a constant if used often
appBackendNamespace,
eppHeader,
correctRequestBody,
"POST",
)
})

t.Run("Gateway should route traffic specified by EPP even an invalidIP and should get response with error code 429", func(t *testing.T) {
invalidIP := "256.256.256.256" // An IP that cannot be a real endpoint
t.Logf("Sending request to %s with EPP header routing to invalid IP %s", gwAddr, invalidIP)
eppHeader := map[string]string{"test-epp-endpoint-selection": invalidIP}

trafficutils.MakeRequestAndExpectTooManyRequest(
t,
s.RoundTripper,
s.TimeoutConfig,
gwAddr,
hostname,
path,
"infra-backend-deployment",
appBackendNamespace,
eppHeader,
correctRequestBody,
"POST",
)
})

t.Run("Gateway should reject request that is missing the model name and return 400 response", func(t *testing.T) {
requestBodyWithoutModel := `{"prompt": "Write as if you were a critic: San Francisc"}`
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}
t.Logf("Sending request to %s with a malformed body (missing model)", gwAddr)

trafficutils.MakeRequestAndExpectBadRequest(
t,
s.RoundTripper,
s.TimeoutConfig,
gwAddr,
hostname,
path,
"infra-backend-deployment",
appBackendNamespace,
eppHeader,
requestBodyWithoutModel,
"POST",
)
})

t.Run("Gateway should reject request that is with a nonexist model name and return 404 response", func(t *testing.T) {
requestBodyNonExistModel := `{
"model": "non-exist-model",
"prompt": "Write as if you were a critic: San Francisc"
}`
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP}
t.Logf("Sending request to %s with a malformed body (nonexist model)", gwAddr)

trafficutils.MakeRequestAndExpectNotFoundV2(
t,
s.RoundTripper,
s.TimeoutConfig,
gwAddr,
hostname,
path,
"infra-backend-deployment",
appBackendNamespace,
eppHeader,
requestBodyNonExistModel,
"POST",
)
})
},
}
203 changes: 203 additions & 0 deletions conformance/tests/basic/gateway_following_epp_routing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# conformance/tests/basic/gateway_following_epp_routing.yaml

# This manifest defines the initial resources for the
# gateway_following_epp_routing.go conformance test.

# --- Backend Deployment (using standard Gateway API echoserver) ---
# This Deployment provides Pods for the InferencePool to select.
apiVersion: apps/v1
kind: Deployment
metadata:
name: infra-backend-deployment
namespace: gateway-conformance-app-backend
labels:
app: infra-backend
spec:
selector:
matchLabels:
app: infra-backend
template:
metadata:
labels:
app: infra-backend
spec:
containers:
- name: echoserver
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
ports:
- containerPort: 3000
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 3
periodSeconds: 5
failureThreshold: 2
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
---
# --- Backend Service ---
# Service for the infra-backend-deployment.
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferenceModel
metadata:
name: conformance-fake-model-server
namespace: gateway-conformance-app-backend
spec:
modelName: conformance-fake-model
criticality: Critical # Mark it as critical to bypass the saturation check since the model server is fake and don't have such metrics.
poolRef:
name: normal-gateway-pool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't a Service + I don't think we need it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename the comment.

"I don't think we need it"
I think we still need the InferenceModel to direct the traffic to the correct POD hosting the model.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't think we're testing EPP behavior? Instead I thought that every request would tell EPP which Pod IP(s) to return, and then our test would validate EPP behavior.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right about the test's intent. The need for the InferenceModel is an implementation detail of the testingEPP.

Even though we control the final endpoint list with request headers, the testingEPP still requires the InferenceModel resource to:

  1. Discover the initial list of all possible pods from the referenced InferencePoo. The requestHeadBased filter(testingEPP used) then selects from this list.
  2. Identify the modelName it is serving

So, while the test logic focuses on the gateway's reaction to the EPP's output, the testingEPP itself still needs the InferenceModel to know the universe of possible backends and the model being served

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we modify EPP so it doesn't need InferenceModel when running in this context?

cc @ahg-g @kfswain

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little hard and needs more refactoring because currently the Pod lists is from the datastore(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/epp/scheduling/scheduler.go#L108). And it's getting updated from some reconciler logic I believe. Hence there is not a easy way we can pass in a input parameters to EPP as what we did for custom filter.

Also, currently the EPP request control flow tries to find a inferenceModel(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/epp/requestcontrol/director.go#L94-L97), so w/o inferenceModel the EPP will always return error.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we can do that. @zetxqx will you cut an issue and assign it to me? Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks both,

---
# --- InferencePool Definition ---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
name: normal-gateway-pool
namespace: gateway-conformance-app-backend
spec:
selector:
app: "infra-backend"
targetPortNumber: 3000
extensionRef:
name: infra-backend-endpoint-picker
---
# --- HTTPRoute for Primary Gateway (conformance-gateway) ---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: httproute-for-primary-gw
namespace: gateway-conformance-app-backend
spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: conformance-gateway
namespace: gateway-conformance-infra
sectionName: http
hostnames:
- "primary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
kind: InferencePool
name: normal-gateway-pool
matches:
- path:
type: PathPrefix
value: /primary-gateway-test
---
# --- Conformance EPP service Definition ---
apiVersion: v1
kind: Service
metadata:
name: infra-backend-endpoint-picker
namespace: gateway-conformance-app-backend
spec:
selector:
app: infra-backend-epp
ports:
- protocol: TCP
port: 9002
targetPort: 9002
appProtocol: http2
type: ClusterIP
---
# --- Conformance EPP Deployment ---
apiVersion: apps/v1
kind: Deployment
metadata:
name: infra-backend-epp
namespace: gateway-conformance-app-backend
labels:
app: infra-backend-epp
spec:
replicas: 1
selector:
matchLabels:
app: infra-backend-epp
template:
metadata:
labels:
app: infra-backend-epp
spec:
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- "normal-gateway-pool"
- -poolNamespace
- "gateway-conformance-app-backend"
- -v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
env:
- name: USE_STREAMING
value: "true"
- name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
value: "true"
ports:
- containerPort: 9002
- containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
# --- Conformance EPP Requried Role and RoleBindings ---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: inference-model-reader
namespace: gateway-conformance-app-backend
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels", "inferencepools"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: epp-to-inference-model-reader
namespace: gateway-conformance-app-backend
subjects:
- kind: ServiceAccount
name: default
namespace: gateway-conformance-app-backend
roleRef:
kind: Role
name: inference-model-reader
apiGroup: rbac.authorization.k8s.io
Loading