Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions api/v1/inferencepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type Port struct {

// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its
// associated configuration.
// +kubebuilder:validation:XValidation:rule="self.kind != 'Service' || has(self.port)",message="port is required when kind is 'Service' or unspecified (defaults to 'Service')"
type EndpointPickerRef struct {
// Group is the group of the referent API object. When unspecified, the default value
// is "", representing the Core API group.
Expand Down Expand Up @@ -125,13 +126,15 @@ type EndpointPickerRef struct {
// +required
Name ObjectName `json:"name,omitempty"`

// PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
// implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
// unspecified (defaults to "Service").
// Port is the port of the Endpoint Picker extension service.
//
// Port is required when the referent is a Kubernetes Service. In this
// case, the port number is the service port number, not the target port.
// For other resources, destination port might be derived from the referent
// resource or this field.
//
// +optional
//nolint:kubeapilinter // ignore kubeapilinter here as we want to use pointer as zero means all ports in convention, we don't make to use 0 to indicate not set.
PortNumber *PortNumber `json:"portNumber,omitempty"`
Port *Port `json:"port,omitempty"`

// FailureMode configures how the parent handles the case when the Endpoint Picker extension
// is non-responsive. When unspecified, defaults to "FailClose".
Expand Down
6 changes: 3 additions & 3 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions apix/v1alpha2/inferencepool_conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func convertExtensionRefToV1(src *Extension) (v1.EndpointPickerRef, error) {
}
endpointPickerRef.Name = v1.ObjectName(src.Name)
if src.PortNumber != nil {
endpointPickerRef.PortNumber = ptr.To(v1.PortNumber(*src.PortNumber))
endpointPickerRef.Port = ptr.To(v1.Port{Number: v1.PortNumber(*src.PortNumber)})
}
if src.FailureMode != nil {
endpointPickerRef.FailureMode = v1.EndpointPickerFailureMode(*src.FailureMode)
Expand All @@ -275,8 +275,8 @@ func convertEndpointPickerRefFromV1(src *v1.EndpointPickerRef) (Extension, error
extension.Kind = ptr.To(Kind(src.Kind))
}
extension.Name = ObjectName(src.Name)
if src.PortNumber != nil {
extension.PortNumber = ptr.To(PortNumber(*src.PortNumber))
if src.Port != nil {
extension.PortNumber = ptr.To(PortNumber(src.Port.Number))
}
if src.FailureMode != "" {
extension.FailureMode = ptr.To(ExtensionFailureMode(src.FailureMode))
Expand Down
7 changes: 4 additions & 3 deletions apix/v1alpha2/inferencepool_conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/google/go-cmp/cmp"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
)

Expand All @@ -34,7 +35,7 @@ var (
v1Group = v1.Group("my-group")
v1Kind = v1.Kind("MyKind")
v1FailureMode = v1.EndpointPickerFailureMode("Deny")
v1PortNumber = v1.PortNumber(9000)
v1Port = v1.Port{Number: 9000}
)

func TestInferencePoolConvertTo(t *testing.T) {
Expand Down Expand Up @@ -110,7 +111,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
Group: &v1Group,
Kind: v1Kind,
Name: "my-epp-service",
PortNumber: &v1PortNumber,
Port: &v1Port,
FailureMode: v1FailureMode,
},
},
Expand Down Expand Up @@ -433,7 +434,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
Group: &v1Group,
Kind: v1Kind,
Name: "my-epp-service",
PortNumber: &v1PortNumber,
Port: &v1Port,
FailureMode: v1FailureMode,
},
},
Expand Down
10 changes: 5 additions & 5 deletions client-go/applyconfiguration/api/v1/endpointpickerref.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 23 additions & 8 deletions config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,33 @@ spec:
maxLength: 253
minLength: 1
type: string
portNumber:
port:
description: |-
PortNumber is the port number of the Endpoint Picker extension service. When unspecified,
implementations SHOULD infer a default value of 9002 when the kind field is "Service" or
unspecified (defaults to "Service").
format: int32
maximum: 65535
minimum: 1
type: integer
Port is the port of the Endpoint Picker extension service.
Port is required when the referent is a Kubernetes Service. In this
case, the port number is the service port number, not the target port.
For other resources, destination port might be derived from the referent
resource or this field.
properties:
number:
description: |-
Number defines the port number to access the selected model server Pods.
The number must be in the range 1 to 65535.
format: int32
maximum: 65535
minimum: 1
type: integer
required:
- number
type: object
required:
- name
type: object
x-kubernetes-validations:
- message: port is required when kind is 'Service' or unspecified
(defaults to 'Service')
rule: self.kind != 'Service' || has(self.port)
selector:
description: |-
Selector determines which Pods are members of this inference pool.
Expand Down
3 changes: 3 additions & 0 deletions config/manifests/inferencepool-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ spec:
app: vllm-llama3-8b-instruct
endpointPickerRef:
name: vllm-llama3-8b-instruct-epp
kind: Service
port:
number: 9002
---
apiVersion: v1
kind: Service
Expand Down
3 changes: 3 additions & 0 deletions conformance/tests/inferencepool_invalid_epp_service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ spec:
- number: 3000
endpointPickerRef:
name: non-existent-epp-svc
kind: Service
port:
number: 9002
---
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
Expand Down
4 changes: 2 additions & 2 deletions site-src/reference/spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ _Appears in:_
| `group` _[Group](#group)_ | Group is the group of the referent API object. When unspecified, the default value<br />is "", representing the Core API group. | | MaxLength: 253 <br />MinLength: 0 <br />Pattern: `^$\|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$` <br /> |
| `kind` _[Kind](#kind)_ | Kind is the Kubernetes resource kind of the referent.<br />Required if the referent is ambiguous, e.g. service with multiple ports.<br />Defaults to "Service" when not specified.<br />ExternalName services can refer to CNAME DNS records that may live<br />outside of the cluster and as such are difficult to reason about in<br />terms of conformance. They also may not be safe to forward to (see<br />CVE-2021-25740 for more information). Implementations MUST NOT<br />support ExternalName Services. | Service | MaxLength: 63 <br />MinLength: 1 <br />Pattern: `^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$` <br /> |
| `name` _[ObjectName](#objectname)_ | Name is the name of the referent API object. | | MaxLength: 253 <br />MinLength: 1 <br /> |
| `portNumber` _[PortNumber](#portnumber)_ | PortNumber is the port number of the Endpoint Picker extension service. When unspecified,<br />implementations SHOULD infer a default value of 9002 when the kind field is "Service" or<br />unspecified (defaults to "Service"). | | Maximum: 65535 <br />Minimum: 1 <br /> |
| `port` _[Port](#port)_ | Port is the port of the Endpoint Picker extension service.<br />Port is required when the referent is a Kubernetes Service. In this<br />case, the port number is the service port number, not the target port.<br />For other resources, destination port might be derived from the referent<br />resource or this field. | | |
| `failureMode` _[EndpointPickerFailureMode](#endpointpickerfailuremode)_ | FailureMode configures how the parent handles the case when the Endpoint Picker extension<br />is non-responsive. When unspecified, defaults to "FailClose". | FailClose | Enum: [FailOpen FailClose] <br /> |


Expand Down Expand Up @@ -340,6 +340,7 @@ Port defines the network port that will be exposed by this InferencePool.


_Appears in:_
- [EndpointPickerRef](#endpointpickerref)
- [InferencePoolSpec](#inferencepoolspec)

| Field | Description | Default | Validation |
Expand All @@ -358,7 +359,6 @@ _Validation:_
- Minimum: 1

_Appears in:_
- [EndpointPickerRef](#endpointpickerref)
- [Port](#port)


Expand Down
114 changes: 114 additions & 0 deletions test/cel/inferencepool_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"fmt"
"testing"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
)

func TestValidateInferencePool(t *testing.T) {
ctx := context.Background()

// baseInferencePool is a valid, InferencePool resource.
baseInferencePool := v1.InferencePool{
ObjectMeta: metav1.ObjectMeta{
Name: "base-pool",
Namespace: metav1.NamespaceDefault,
},
Spec: v1.InferencePoolSpec{
TargetPorts: []v1.Port{
{Number: 8000},
},
Selector: v1.LabelSelector{
MatchLabels: map[v1.LabelKey]v1.LabelValue{
"app": "model-server",
},
},
EndpointPickerRef: v1.EndpointPickerRef{
Name: "epp",
Kind: "Service",
Port: ptrTo(v1.Port{Number: 9002}),
},
},
}

testCases := []struct {
desc string
mutate func(ip *v1.InferencePool)
wantErrors []string
}{
{
desc: "passes validation with a valid configuration",
mutate: func(ip *v1.InferencePool) {
},
wantErrors: nil,
},
{
desc: "fails validation when kind is unset (defaults to Service) and port is missing",
mutate: func(ip *v1.InferencePool) {
// By setting Kind to an empty string, we rely on the API server's default value of "Service".
ip.Spec.EndpointPickerRef.Kind = ""
ip.Spec.EndpointPickerRef.Port = nil
},
wantErrors: []string{"port is required when kind is 'Service' or unspecified (defaults to 'Service')"},
},
{
desc: "fails validation when kind is explicitly 'Service' and port is missing",
mutate: func(ip *v1.InferencePool) {
ip.Spec.EndpointPickerRef.Kind = "Service"
ip.Spec.EndpointPickerRef.Port = nil
},
wantErrors: []string{"port is required when kind is 'Service' or unspecified (defaults to 'Service')"},
},
}

for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
ip := baseInferencePool.DeepCopy()
// Use a unique name for each test case to avoid conflicts.
ip.Name = fmt.Sprintf("test-pool-%v", time.Now().UnixNano())

if tc.mutate != nil {
tc.mutate(ip)
}
err := k8sClient.Create(ctx, ip)

// This is a boolean XOR. It's true if one is true, but not both.
// It ensures that an error is returned if and only if we expect one.
if (len(tc.wantErrors) != 0) != (err != nil) {
t.Fatalf("Unexpected response while creating InferencePool; got err=\n%v\n; want error=%v", err, tc.wantErrors != nil)
}

// If we got an error, check that it contains the expected substrings.
var missingErrorStrings []string
for _, wantError := range tc.wantErrors {
if !celErrorStringMatches(err.Error(), wantError) {
missingErrorStrings = append(missingErrorStrings, wantError)
}
}
if len(missingErrorStrings) != 0 {
t.Errorf("Unexpected response while creating InferencePool; got err=\n%v\n; missing strings within error=%q", err, missingErrorStrings)
}
})
}
}
Loading