Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions conformance/conformance.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ import (

// Constants for the shared Gateway
const (
SharedGatewayName = "conformance-gateway" // Name of the Gateway in manifests.yaml
SharedGatewayNamespace = "gateway-conformance-infra" // Namespace of the Gateway
SharedGatewayName = "conformance-primary-gateway" // Name of the Gateway in manifests.yaml
SharedGatewayNamespace = "gateway-conformance-infra" // Namespace of the Gateway
)

// GatewayLayerProfileName defines the name for the conformance profile that tests
Expand All @@ -88,6 +88,7 @@ const SupportInferencePool features.FeatureName = "SupportInferencePool"
// of the "Gateway" profile for the Inference Extension MUST support.
var InferenceCoreFeatures = sets.New(
features.SupportGateway, // This is needed to ensure manifest gets applied during setup.
features.SupportHTTPRoute,
SupportInferencePool,
)

Expand Down
250 changes: 226 additions & 24 deletions conformance/resources/manifests/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,12 @@ metadata:
labels:
gateway-conformance: backend
---
apiVersion: v1
kind: Namespace
metadata:
name: gateway-conformance-web-backend
labels:
gateway-conformance: web-backend
---
# A basic Gateway resource that allows HTTPRoutes from the same namespace.
# Tests can use this as a parent reference for routes that target InferencePools.
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
name: conformance-gateway
name: conformance-primary-gateway
namespace: gateway-conformance-infra
spec:
gatewayClassName: "{GATEWAY_CLASS_NAME}"
Expand Down Expand Up @@ -58,30 +53,32 @@ spec:
allowedRoutes:
namespaces:
from: All

### The following defines the essential resources for the gateway conformance test.
### All resources are created in the 'gateway-conformance-app-backend' namespace.
---
# Deploys a mock backend service to act as a model server.
apiVersion: apps/v1
kind: Deployment
metadata:
name: inference-model-1
name: primary-inference-model-server-deployment
namespace: gateway-conformance-app-backend
labels:
app: inference-model-1
app: primary-inference-model-server
spec:
replicas: 1
selector:
matchLabels:
app: inference-model-1
app: primary-inference-model-server
template:
metadata:
labels:
app: inference-model-1
app: primary-inference-model-server
spec:
containers:
- name: echo-basic-1
- name: echoserver
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
ports:
- name: http
containerPort: 3000
- containerPort: 3000
readinessProbe:
httpGet:
path: /
Expand All @@ -103,29 +100,28 @@ spec:
fieldRef:
fieldPath: status.podIP
---
# Deploys a secondary mock backend service to act as a model server.
apiVersion: apps/v1
kind: Deployment
metadata:
name: inference-model-2
name: secondary-inference-model-server-deployment
namespace: gateway-conformance-app-backend
labels:
app: inference-model-2
app: secondary-inference-model-server
spec:
replicas: 1
selector:
matchLabels:
app: inference-model-2
app: secondary-inference-model-server
template:
metadata:
labels:
app: inference-model-2
app: secondary-inference-model-server
spec:
containers:
- name: echo-basic-2
- name: echoserver
image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd
ports:
- name: http
containerPort: 3000
- containerPort: 3000
readinessProbe:
httpGet:
path: /
Expand All @@ -146,3 +142,209 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.podIP
---
# --- Primary InferencePool Definition ---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
name: primary-inference-pool
namespace: gateway-conformance-app-backend
spec:
selector:
app: primary-inference-model-server
targetPortNumber: 3000
extensionRef:
name: primary-endpoint-picker-svc
---
# --- Primary Conformance EPP service Definition ---
apiVersion: v1
kind: Service
metadata:
name: primary-endpoint-picker-svc
namespace: gateway-conformance-app-backend
spec:
selector:
app: primary-app-backend-epp
ports:
- protocol: TCP
port: 9002
targetPort: 9002
appProtocol: http2
type: ClusterIP
---
# --- Primary Conformance EPP Deployment ---
apiVersion: apps/v1
kind: Deployment
metadata:
name: primary-app-endpoint-picker
namespace: gateway-conformance-app-backend
labels:
app: primary-app-backend-epp
spec:
replicas: 1
selector:
matchLabels:
app: primary-app-backend-epp
template:
metadata:
labels:
app: primary-app-backend-epp
spec:
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- "primary-inference-pool"
- -poolNamespace
- "gateway-conformance-app-backend"
- -v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
env:
- name: USE_STREAMING
value: "true"
- name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
value: "true"
ports:
- containerPort: 9002
- containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
# --- Secondary InferencePool Definition ---
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
name: secondary-inference-pool
namespace: gateway-conformance-app-backend
spec:
selector:
app: secondary-inference-model-server
targetPortNumber: 3000
extensionRef:
name: secondary-endpoint-picker-svc
---
# --- Secondary Conformance EPP service Definition ---
apiVersion: v1
kind: Service
metadata:
name: secondary-endpoint-picker-svc
namespace: gateway-conformance-app-backend
spec:
selector:
app: secondary-app-backend-epp
ports:
- protocol: TCP
port: 9002
targetPort: 9002
appProtocol: http2
type: ClusterIP
---
# --- Secondary Conformance EPP Deployment ---
apiVersion: apps/v1
kind: Deployment
metadata:
name: secondary-app-endpoint-picker
namespace: gateway-conformance-app-backend
labels:
app: secondary-app-backend-epp
spec:
replicas: 1
selector:
matchLabels:
app: secondary-app-backend-epp
template:
metadata:
labels:
app: secondary-app-backend-epp
spec:
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- "secondary-inference-pool"
- -poolNamespace
- "gateway-conformance-app-backend"
- -v
- "4"
- --zap-encoder
- "json"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
env:
- name: USE_STREAMING
value: "true"
- name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING # Used for conformance test.
value: "true"
ports:
- containerPort: 9002
- containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
# --- Required Role and RoleBinding for Conformance Test for EPP ---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: inference-model-reader
namespace: gateway-conformance-app-backend
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels", "inferencepools"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: epp-to-inference-model-reader
namespace: gateway-conformance-app-backend
subjects:
- kind: ServiceAccount
name: default
namespace: gateway-conformance-app-backend
roleRef:
kind: Role
name: inference-model-reader
apiGroup: rbac.authorization.k8s.io
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var HTTPRouteInvalidInferencePoolRef = suite.ConformanceTest{
appBackendNamespace = "gateway-conformance-app-backend"
infraNamespace = "gateway-conformance-infra"
routeName = "httproute-to-non-existent-pool"
gatewayName = "conformance-gateway"
gatewayName = "conformance-primary-gateway"
)
routeNN := types.NamespacedName{Name: routeName, Namespace: appBackendNamespace}
gatewayNN := types.NamespacedName{Name: gatewayName, Namespace: infraNamespace}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@ spec:
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: conformance-gateway # Name of the shared Gateway from base manifests
name: conformance-primary-gateway # Name of the shared Gateway from base manifests
namespace: gateway-conformance-infra # Namespace of the shared Gateway
sectionName: http
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
kind: InferencePool
name: non-existent-inference-pool # Intentionally Non-Existing
port: 8080
matches:
- path:
type: PathPrefix
Expand Down
2 changes: 1 addition & 1 deletion conformance/tests/basic/inferencepool_accepted.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ var InferencePoolAccepted = suite.ConformanceTest{
},
Test: func(t *testing.T, s *suite.ConformanceTestSuite) {
// created by the associated manifest file.
poolNN := types.NamespacedName{Name: "inferencepool-basic-accepted", Namespace: "gateway-conformance-app-backend"}
poolNN := types.NamespacedName{Name: "primary-inference-pool", Namespace: "gateway-conformance-app-backend"}

t.Run("InferencePool should have Accepted condition set to True", func(t *testing.T) {
// Define the expected status condition. We use the standard "Accepted"
Expand Down
Loading