Skip to content

Commit 4ebab20

Browse files
committed
updating helm charts to work on the cluster with v1 and v1a2
1 parent b74259d commit 4ebab20

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed
Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
1+
{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
22
apiVersion: {{ .Values.inferencePool.apiVersion }}
33
kind: InferencePool
44
metadata:
@@ -7,17 +7,31 @@ metadata:
77
labels:
88
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
99
spec:
10-
{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
11-
targetPortNumber:
12-
{{- range .Values.inferencePool.targetPorts }}
13-
- number: {{ .number }}
10+
targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }}
11+
selector:
12+
{{- if .Values.inferencePool.modelServers.matchLabels }}
13+
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
14+
{{ $key }}: {{ quote $value }}
1415
{{- end }}
15-
{{ else }}
16+
{{- end }}
17+
extensionRef:
18+
name: {{ include "gateway-api-inference-extension.name" . }}
19+
portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
20+
failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }}
21+
{{ else }}
22+
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
23+
apiVersion: "inference.networking.k8s.io/v1"
24+
kind: InferencePool
25+
metadata:
26+
name: {{ .Release.Name }}
27+
namespace: {{ .Release.Namespace }}
28+
labels:
29+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
30+
spec:
1631
targetPorts:
1732
{{- range .Values.inferencePool.targetPorts }}
1833
- number: {{ .number }}
1934
{{- end }}
20-
{{- end}}
2135
selector:
2236
matchLabels:
2337
{{- if .Values.inferencePool.modelServers.matchLabels }}
@@ -29,5 +43,6 @@ spec:
2943
name: {{ include "gateway-api-inference-extension.name" . }}
3044
port:
3145
number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
46+
{{- end }}
3247

3348

config/charts/inferencepool/values.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,15 @@ inferencePool:
4444
targetPorts:
4545
- number: 8000
4646
modelServerType: vllm # vllm, triton-tensorrt-llm
47-
apiVersion: inference.networking.k8s.io/v1
47+
apiVersion: inference.networking.k8s.io/v1
4848
# modelServers: # REQUIRED
4949
# matchLabels:
5050
# app: vllm-llama3-8b-instruct
5151

52+
# Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
53+
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
54+
targetPortNumber: 8000
55+
5256
provider:
5357
name: none
5458

0 commit comments

Comments
 (0)