-
Notifications
You must be signed in to change notification settings - Fork 182
feat(conformance): Add EPP conformance test for Gateway routing #961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
8d6fb58
f879558
9f3e382
b83290c
a680c76
ff3086a
5b725d0
838b535
f313e58
bf95ea9
215b7ce
417cb44
be9c779
3809e50
a2ba595
d99f096
5b4a86c
9c7788f
2627f00
72c4ec1
fa47b15
1a3daae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
/* | ||
Copyright 2025 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package basic | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
"k8s.io/apimachinery/pkg/types" // For standard condition types | ||
"sigs.k8s.io/gateway-api/conformance/utils/suite" | ||
"sigs.k8s.io/gateway-api/pkg/features" // For standard feature names | ||
|
||
// Import the tests package to append to ConformanceTests | ||
"sigs.k8s.io/gateway-api-inference-extension/conformance/tests" | ||
"sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config" | ||
k8sutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/kubernetes" | ||
trafficutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/traffic" | ||
) | ||
|
||
func init() { | ||
// Register the InferencePoolAccepted test case with the conformance suite. | ||
// This ensures it will be discovered and run by the test runner. | ||
tests.ConformanceTests = append(tests.ConformanceTests, GatwayFollowingEPPRouting) | ||
} | ||
|
||
// InferencePoolAccepted defines the test case for verifying basic InferencePool acceptance. | ||
var GatwayFollowingEPPRouting = suite.ConformanceTest{ | ||
ShortName: "GatwayFollowingEPPRouting", | ||
Description: "Inference gateway should redirect traffic to an endpoints belonging to what EPP respond endpoints list", | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
Manifests: []string{"tests/basic/gateway_following_epp_routing.yaml"}, | ||
Features: []features.FeatureName{ | ||
features.FeatureName("SupportInferencePool"), | ||
features.SupportGateway, | ||
}, | ||
Test: func(t *testing.T, s *suite.ConformanceTestSuite) { | ||
const ( | ||
appBackendNamespace = "gateway-conformance-app-backend" | ||
infraNamespace = "gateway-conformance-infra" | ||
hostname = "primary.example.com" | ||
path = "/primary-gateway-test" | ||
) | ||
|
||
httpRouteNN := types.NamespacedName{Name: "httproute-for-primary-gw", Namespace: appBackendNamespace} | ||
gatewayNN := types.NamespacedName{Name: "conformance-gateway", Namespace: infraNamespace} | ||
poolNN := types.NamespacedName{Name: "normal-gateway-pool", Namespace: appBackendNamespace} | ||
backendPodLabels := map[string]string{"app": "infra-backend"} | ||
|
||
k8sutils.HTTPRouteMustBeAcceptedAndResolved(t, s.Client, s.TimeoutConfig, httpRouteNN, gatewayNN) | ||
k8sutils.InferencePoolMustBeAcceptedByParent(t, s.Client, poolNN) | ||
gwAddr := k8sutils.GetGatewayEndpoint(t, s.Client, s.TimeoutConfig, gatewayNN) | ||
|
||
backendPodIP, err := k8sutils.GetOnePodIPWithLabel(t, s.Client, appBackendNamespace, backendPodLabels) | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
require.NoError(t, err, "Failed to get backend Pod IP address") | ||
|
||
inferenceTimeoutConfig := config.DefaultInferenceExtensionTimeoutConfig() | ||
// TODO: replace this with a poll and check. | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
t.Log("Waiting for the httpRoute and inferecePool ready to serve traffic.") | ||
time.Sleep(inferenceTimeoutConfig.WaitForHttpRouteAndInferencePoolReadyTimeout) | ||
|
||
correctRequestBody := `{ | ||
"model": "conformance-fake-model", | ||
"prompt": "Write as if you were a critic: San Francisc" | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
}` | ||
|
||
t.Run("Gateway should route traffic to a valid endpoint specified by EPP", func(t *testing.T) { | ||
t.Logf("Sending request to %s with EPP header routing to valid IP %s", gwAddr, backendPodIP) | ||
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP} | ||
|
||
trafficutils.MakeRequestAndExpectSuccessV2( | ||
t, | ||
s.RoundTripper, | ||
s.TimeoutConfig, | ||
gwAddr, | ||
hostname, | ||
path, | ||
"infra-backend-deployment", // This might be better as a constant if used often | ||
appBackendNamespace, | ||
eppHeader, | ||
correctRequestBody, | ||
"POST", | ||
) | ||
}) | ||
|
||
t.Run("Gateway should route traffic specified by EPP even an invalidIP and should get response with error code 429", func(t *testing.T) { | ||
invalidIP := "256.256.256.256" // An IP that cannot be a real endpoint | ||
t.Logf("Sending request to %s with EPP header routing to invalid IP %s", gwAddr, invalidIP) | ||
eppHeader := map[string]string{"test-epp-endpoint-selection": invalidIP} | ||
|
||
trafficutils.MakeRequestAndExpectTooManyRequest( | ||
t, | ||
s.RoundTripper, | ||
s.TimeoutConfig, | ||
gwAddr, | ||
hostname, | ||
path, | ||
"infra-backend-deployment", | ||
appBackendNamespace, | ||
eppHeader, | ||
correctRequestBody, | ||
"POST", | ||
) | ||
}) | ||
|
||
t.Run("Gateway should reject request that is missing the model name and return 400 response", func(t *testing.T) { | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
requestBodyWithoutModel := `{"prompt": "Write as if you were a critic: San Francisc"}` | ||
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP} | ||
t.Logf("Sending request to %s with a malformed body (missing model)", gwAddr) | ||
|
||
trafficutils.MakeRequestAndExpectBadRequest( | ||
t, | ||
s.RoundTripper, | ||
s.TimeoutConfig, | ||
gwAddr, | ||
hostname, | ||
path, | ||
"infra-backend-deployment", | ||
appBackendNamespace, | ||
eppHeader, | ||
requestBodyWithoutModel, | ||
"POST", | ||
) | ||
}) | ||
|
||
t.Run("Gateway should reject request that is with a nonexist model name and return 404 response", func(t *testing.T) { | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
requestBodyNonExistModel := `{ | ||
"model": "non-exist-model", | ||
"prompt": "Write as if you were a critic: San Francisc" | ||
}` | ||
eppHeader := map[string]string{"test-epp-endpoint-selection": backendPodIP} | ||
t.Logf("Sending request to %s with a malformed body (nonexist model)", gwAddr) | ||
|
||
trafficutils.MakeRequestAndExpectNotFoundV2( | ||
t, | ||
s.RoundTripper, | ||
s.TimeoutConfig, | ||
gwAddr, | ||
hostname, | ||
path, | ||
"infra-backend-deployment", | ||
appBackendNamespace, | ||
eppHeader, | ||
requestBodyNonExistModel, | ||
"POST", | ||
) | ||
}) | ||
}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
# conformance/tests/basic/gateway_following_epp_routing.yaml | ||
|
||
# This manifest defines the initial resources for the | ||
# gateway_following_epp_routing.go conformance test. | ||
|
||
# --- Backend Deployment (using standard Gateway API echoserver) --- | ||
# This Deployment provides Pods for the InferencePool to select. | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: infra-backend-deployment | ||
namespace: gateway-conformance-app-backend | ||
labels: | ||
app: infra-backend | ||
spec: | ||
selector: | ||
matchLabels: | ||
app: infra-backend | ||
template: | ||
metadata: | ||
labels: | ||
app: infra-backend | ||
spec: | ||
containers: | ||
- name: echoserver | ||
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd | ||
ports: | ||
- containerPort: 3000 | ||
readinessProbe: | ||
httpGet: | ||
path: / | ||
port: 3000 | ||
initialDelaySeconds: 3 | ||
periodSeconds: 5 | ||
failureThreshold: 2 | ||
env: | ||
- name: POD_NAME | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: metadata.name | ||
- name: NAMESPACE | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: metadata.namespace | ||
- name: POD_IP | ||
valueFrom: | ||
fieldRef: | ||
fieldPath: status.podIP | ||
--- | ||
# --- Backend Service --- | ||
# Service for the infra-backend-deployment. | ||
apiVersion: inference.networking.x-k8s.io/v1alpha2 | ||
kind: InferenceModel | ||
metadata: | ||
name: conformance-fake-model-server | ||
namespace: gateway-conformance-app-backend | ||
spec: | ||
modelName: conformance-fake-model | ||
criticality: Critical # Mark it as critical to bypass the saturation check since the model server is fake and don't have such metrics. | ||
poolRef: | ||
name: normal-gateway-pool | ||
|
||
--- | ||
# --- InferencePool Definition --- | ||
apiVersion: inference.networking.x-k8s.io/v1alpha2 | ||
kind: InferencePool | ||
metadata: | ||
name: normal-gateway-pool | ||
namespace: gateway-conformance-app-backend | ||
spec: | ||
selector: | ||
app: "infra-backend" | ||
targetPortNumber: 3000 | ||
extensionRef: | ||
name: infra-backend-endpoint-picker | ||
--- | ||
# --- HTTPRoute for Primary Gateway (conformance-gateway) --- | ||
apiVersion: gateway.networking.k8s.io/v1 | ||
kind: HTTPRoute | ||
metadata: | ||
name: httproute-for-primary-gw | ||
namespace: gateway-conformance-app-backend | ||
spec: | ||
parentRefs: | ||
- group: gateway.networking.k8s.io | ||
kind: Gateway | ||
name: conformance-gateway | ||
namespace: gateway-conformance-infra | ||
sectionName: http | ||
hostnames: | ||
- "primary.example.com" | ||
rules: | ||
- backendRefs: | ||
- group: inference.networking.x-k8s.io | ||
kind: InferencePool | ||
name: normal-gateway-pool | ||
matches: | ||
- path: | ||
type: PathPrefix | ||
value: /primary-gateway-test | ||
--- | ||
# --- Conformance EPP service Definition --- | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: infra-backend-endpoint-picker | ||
namespace: gateway-conformance-app-backend | ||
spec: | ||
selector: | ||
app: infra-backend-epp | ||
ports: | ||
- protocol: TCP | ||
port: 9002 | ||
targetPort: 9002 | ||
appProtocol: http2 | ||
type: ClusterIP | ||
--- | ||
# --- Conformance EPP Deployment --- | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: infra-backend-epp | ||
namespace: gateway-conformance-app-backend | ||
labels: | ||
app: infra-backend-epp | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app: infra-backend-epp | ||
template: | ||
metadata: | ||
labels: | ||
app: infra-backend-epp | ||
spec: | ||
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool | ||
terminationGracePeriodSeconds: 130 | ||
containers: | ||
- name: epp | ||
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main | ||
imagePullPolicy: Always | ||
args: | ||
- -poolName | ||
- "normal-gateway-pool" | ||
- -poolNamespace | ||
- "gateway-conformance-app-backend" | ||
- -v | ||
- "4" | ||
- --zap-encoder | ||
- "json" | ||
- -grpcPort | ||
- "9002" | ||
- -grpcHealthPort | ||
- "9003" | ||
env: | ||
- name: USE_STREAMING | ||
value: "true" | ||
- name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test. | ||
value: "true" | ||
ports: | ||
- containerPort: 9002 | ||
- containerPort: 9003 | ||
- name: metrics | ||
containerPort: 9090 | ||
livenessProbe: | ||
grpc: | ||
port: 9003 | ||
service: inference-extension | ||
initialDelaySeconds: 5 | ||
periodSeconds: 10 | ||
readinessProbe: | ||
grpc: | ||
port: 9003 | ||
service: inference-extension | ||
initialDelaySeconds: 5 | ||
periodSeconds: 10 | ||
--- | ||
# --- Conformance EPP Requried Role and RoleBindings --- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: Role | ||
metadata: | ||
name: inference-model-reader | ||
namespace: gateway-conformance-app-backend | ||
rules: | ||
- apiGroups: ["inference.networking.x-k8s.io"] | ||
resources: ["inferencemodels", "inferencepools"] | ||
verbs: ["get", "list", "watch"] | ||
- apiGroups: [""] | ||
resources: ["pods"] | ||
verbs: ["get", "list", "watch"] | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: RoleBinding | ||
metadata: | ||
name: epp-to-inference-model-reader | ||
namespace: gateway-conformance-app-backend | ||
subjects: | ||
- kind: ServiceAccount | ||
name: default | ||
namespace: gateway-conformance-app-backend | ||
roleRef: | ||
kind: Role | ||
name: inference-model-reader | ||
apiGroup: rbac.authorization.k8s.io | ||
zetxqx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
Uh oh!
There was an error while loading. Please reload this page.