Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion config/charts/inferencepool/templates/inferencepool.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
apiVersion: {{ .Values.inferencePool.apiVersion }}
kind: InferencePool
metadata:
name: {{ .Release.Name }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }}
selector:
{{- if .Values.inferencePool.modelServers.matchLabels }}
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
{{ $key }}: {{ quote $value }}
{{- end }}
{{- end }}
extensionRef:
name: {{ include "gateway-api-inference-extension.name" . }}
portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }}
{{ else }}
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
apiVersion: inference.networking.k8s.io/v1
apiVersion: "inference.networking.k8s.io/v1"
kind: InferencePool
metadata:
name: {{ .Release.Name }}
Expand All @@ -22,5 +43,6 @@ spec:
name: {{ include "gateway-api-inference-extension.name" . }}
port:
number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
{{- end }}


5 changes: 5 additions & 0 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,15 @@ inferencePool:
targetPorts:
- number: 8000
modelServerType: vllm # vllm, triton-tensorrt-llm
apiVersion: inference.networking.k8s.io/v1
# modelServers: # REQUIRED
# matchLabels:
# app: vllm-llama3-8b-instruct

# Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
targetPortNumber: 8000

provider:
name: none

Expand Down