Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
1e127ac
TargetPortNumber int32 to become TargetPorts []Port
capri-xiyue Aug 11, 2025
3f7d448
partially fix ut
capri-xiyue Aug 11, 2025
90a7e97
change inferencepool ut temporarily
capri-xiyue Aug 11, 2025
922833c
change comments
capri-xiyue Aug 11, 2025
55bfa1b
changed yaml
capri-xiyue Aug 11, 2025
67eefa2
added temporary fix
capri-xiyue Aug 11, 2025
7524401
Update api/v1/inferencepool_types.go
capri-xiyue Aug 11, 2025
cfb2526
fixed ut and e2e to adopt naming change
capri-xiyue Aug 11, 2025
5c3cdcd
added listtype tag
capri-xiyue Aug 11, 2025
4262c0e
revert back:
capri-xiyue Aug 12, 2025
16b2c43
updated to include list map
capri-xiyue Aug 12, 2025
9f85e4f
Update site-src/guides/inferencepool-rollout.md
capri-xiyue Aug 12, 2025
11fc5b3
rebase
capri-xiyue Aug 12, 2025
02212a4
partially fix conflicts
capri-xiyue Aug 13, 2025
ca97398
rebase with conversion change
capri-xiyue Aug 13, 2025
93ab298
updated generate
capri-xiyue Aug 13, 2025
14a74b6
added ut
capri-xiyue Aug 13, 2025
1f9ff74
rebase main
capri-xiyue Aug 14, 2025
753af72
fixed merge error
capri-xiyue Aug 14, 2025
cce86b8
updated helm chart
capri-xiyue Aug 14, 2025
119d445
updated message
capri-xiyue Aug 14, 2025
e0c297c
updated to use alias
capri-xiyue Aug 14, 2025
4ccccfe
fixed ut
capri-xiyue Aug 14, 2025
182f4ee
merge conflicts
capri-xiyue Aug 14, 2025
357b96f
merge conflicts
capri-xiyue Aug 14, 2025
2d4a599
fixed kubectl cli linter
capri-xiyue Aug 14, 2025
df8681c
make it back to pointer
capri-xiyue Aug 14, 2025
4678971
fixed kal linter
capri-xiyue Aug 14, 2025
f93f723
fixed conversion
capri-xiyue Aug 14, 2025
369ce0a
revert v1a2 to use pointer
capri-xiyue Aug 14, 2025
4207d68
change extension to be non-nil pointer
capri-xiyue Aug 14, 2025
f76c994
make v1a2 the extension
capri-xiyue Aug 14, 2025
ba5ceaa
added godocs
capri-xiyue Aug 14, 2025
aa8b10e
updated comments
capri-xiyue Aug 14, 2025
49d2c43
updated comments
capri-xiyue Aug 14, 2025
0896625
updated conversion
capri-xiyue Aug 15, 2025
d8a7dff
added nil check
capri-xiyue Aug 15, 2025
3640128
added nil check
capri-xiyue Aug 15, 2025
c352bae
fixed linter
capri-xiyue Aug 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions api/v1/inferencepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,29 @@ type InferencePoolSpec struct {
// +required
Selector LabelSelector `json:"selector,omitempty,omitzero"`

// TargetPortNumber defines the port number to access the selected model server Pods.
// The number must be in the range 1 to 65535.
//
// +kubebuilder:validation:Minimum=1
// +kubebuilder:validation:Maximum=65535
// TargetPorts defines a list of ports that are exposed by this InferencePool.
// Currently, the list may only include a single port definition.
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=1
// +listType=map
// +listMapKey=number
// +required
TargetPortNumber int32 `json:"targetPortNumber,omitempty"`
TargetPorts []Port `json:"targetPorts,omitempty"`

// Extension configures an endpoint picker as an extension service.
// +optional
// +required
ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"`
}

// Port defines the network port that will be exposed by this InferencePool.
type Port struct {
// Number defines the port number to access the selected model server Pods.
// The number must be in the range 1 to 65535.
//
// +required
Number PortNumber `json:"number,omitempty"`
}

// Extension specifies how to configure an extension that runs the endpoint picker.
type Extension struct {
// Group is the group of the referent.
Expand Down
20 changes: 20 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 12 additions & 12 deletions apix/v1alpha2/inferencepool_conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error {
if dst == nil {
return errors.New("dst cannot be nil")
}
v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef)
v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef)
if err != nil {
return err
}
Expand All @@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error {
}
dst.TypeMeta = src.TypeMeta
dst.ObjectMeta = src.ObjectMeta
dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber
dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}}
dst.Spec.ExtensionRef = v1Extension
dst.Status = *v1Status
if src.Spec.Selector != nil {
Expand All @@ -68,7 +68,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error {
}
dst.TypeMeta = src.TypeMeta
dst.ObjectMeta = src.ObjectMeta
dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber
dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number)
dst.Spec.ExtensionRef = extensionRef
dst.Status = *status
if src.Spec.Selector.MatchLabels != nil {
Expand All @@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error {

func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) {
if src == nil {
return nil, nil
return nil, errors.New("src cannot be nil")
}
u, err := toUnstructured(src)
if err != nil {
Expand All @@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error

func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) {
if src == nil {
return nil, nil
return nil, errors.New("src cannot be nil")
}
u, err := toUnstructured(src)
if err != nil {
Expand All @@ -104,7 +104,7 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err

func convertExtensionRefToV1(src *Extension) (v1.Extension, error) {
if src == nil {
return v1.Extension{}, nil
return v1.Extension{}, errors.New("src cannot be nil")
}
u, err := toUnstructured(src)
if err != nil {
Expand All @@ -117,19 +117,19 @@ func convertExtensionRefToV1(src *Extension) (v1.Extension, error) {
return *out, nil
}

func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) {
func convertExtensionRefFromV1(src *v1.Extension) (Extension, error) {
if src == nil {
return nil, nil
return Extension{}, errors.New("src cannot be nil")
}
u, err := toUnstructured(src)
u, err := toUnstructured(&src)
if err != nil {
return nil, err
return Extension{}, err
}
extension, err := convert[Extension](u)
if err != nil {
return nil, err
return Extension{}, err
}
return extension, nil
return *extension, nil
}

func toUnstructured(obj any) (*unstructured.Unstructured, error) {
Expand Down
17 changes: 8 additions & 9 deletions apix/v1alpha2/inferencepool_conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
"app": "my-model-server",
},
TargetPortNumber: 8080,
ExtensionRef: &Extension{
ExtensionRef: Extension{
Group: &group,
Kind: &kind,
Name: "my-epp-service",
Expand Down Expand Up @@ -99,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
"app": "my-model-server",
},
},
TargetPortNumber: 8080,
TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
ExtensionRef: v1.Extension{
Group: &v1Group,
Kind: v1Kind,
Expand Down Expand Up @@ -127,7 +127,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
wantErr: false,
},
{
name: "conversion from v1alpha2 to v1 with nil extensionRef",
name: "conversion from v1alpha2 to v1 with empty extensionRef",
src: &InferencePool{
TypeMeta: metav1.TypeMeta{
Kind: "InferencePool",
Expand Down Expand Up @@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
"app": "my-model-server",
},
},
TargetPortNumber: 8080,
TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
},
Status: v1.InferencePoolStatus{
Parents: []v1.PoolStatus{
Expand Down Expand Up @@ -234,7 +234,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
"app": "my-model-server",
},
},
TargetPortNumber: 8080,
TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
ExtensionRef: v1.Extension{
Group: &v1Group,
Kind: v1Kind,
Expand Down Expand Up @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
"app": "my-model-server",
},
TargetPortNumber: 8080,
ExtensionRef: &Extension{
ExtensionRef: Extension{
Group: &group,
Kind: &kind,
Name: "my-epp-service",
Expand All @@ -300,7 +300,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
wantErr: false,
},
{
name: "conversion from v1 to v1alpha2 with nil extensionRef",
name: "conversion from v1 to v1alpha2 with empty extensionRef",
src: &v1.InferencePool{
TypeMeta: metav1.TypeMeta{
Kind: "InferencePool",
Expand All @@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
"app": "my-model-server",
},
},
TargetPortNumber: 8080,
TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
},
Status: v1.InferencePoolStatus{
Parents: []v1.PoolStatus{
Expand Down Expand Up @@ -348,7 +348,6 @@ func TestInferencePoolConvertFrom(t *testing.T) {
"app": "my-model-server",
},
TargetPortNumber: 8080,
ExtensionRef: &Extension{},
},
Status: InferencePoolStatus{
Parents: []PoolStatus{
Expand Down
3 changes: 2 additions & 1 deletion apix/v1alpha2/inferencepool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ type InferencePoolSpec struct {
TargetPortNumber int32 `json:"targetPortNumber"`

// Extension configures an endpoint picker as an extension service.
ExtensionRef *Extension `json:"extensionRef,omitempty"`
// +required
ExtensionRef Extension `json:"extensionRef,omitempty"`
}

// Extension specifies how to configure an extension that runs the endpoint picker.
Expand Down
6 changes: 1 addition & 5 deletions apix/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 13 additions & 8 deletions client-go/applyconfiguration/api/v1/inferencepoolspec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions client-go/applyconfiguration/api/v1/port.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ var (
"The configuration specified as text, in lieu of a file")

modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
"Default value will be set to the InferencePool.Spec.TargetPorts[0].Number if not set.")
modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
Expand Down
5 changes: 4 additions & 1 deletion config/charts/inferencepool/templates/inferencepool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ metadata:
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
targetPorts:
{{- range .Values.inferencePool.targetPorts }}
- number: {{ .number }}
{{- end }}
selector:
matchLabels:
{{- if .Values.inferencePool.modelServers.matchLabels }}
Expand Down
9 changes: 5 additions & 4 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ inferenceExtension:
enableLeaderElection: false

inferencePool:
targetPortNumber: 8000
targetPorts:
- number: 8000
modelServerType: vllm # vllm, triton-tensorrt-llm
# modelServers: # REQUIRED
# matchLabels:
# app: vllm-llama3-8b-instruct
modelServers: # REQUIRED
matchLabels:
app: vllm-llama3-8b-instruct

provider:
name: none
Expand Down
Loading