InftyAI · InftyAI-Agent · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/Makefile b/Makefile
@@ -316,11 +316,14 @@ helm: manifests kustomize helmify
 
 .PHONY: helm-install
 helm-install: helm
-	helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml
+	helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml --dependency-update
 
 .PHONY: helm-upgrade
 helm-upgrade: image-push artifacts helm-install
 
+.PHONY: install-chatbot
+install-chatbot: helm-install
+
 .PHONY: helm-package
 helm-package: helm
 	# Make sure will alwasy start with a new line.

diff --git a/README.md b/README.md
@@ -38,6 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes
 - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users.
 - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0.
 - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds.
+- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configurations [here](./docs/open-webui.md).
 
 ## Quick Start
 

diff --git a/chart/Chart.lock b/chart/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: open-webui
+  repository: https://helm.openwebui.com/
+  version: 6.4.0
+digest: sha256:2520f6e26f2e6fd3e51c5f7f940eef94217c125a9828b0f59decedbecddcdb29
+generated: "2025-04-21T00:50:06.532039+08:00"
diff --git a/chart/Chart.yaml b/chart/Chart.yaml
@@ -19,3 +19,9 @@ version: 0.0.8
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: 0.1.2
+
+dependencies:
+  - name: open-webui
+    version: "6.4.0"
+    repository: "https://helm.openwebui.com/"
+    condition: open-webui.enabled
diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:

diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:

diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:

diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:

diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.backendRuntime.enable -}}
+{{- if .Values.backendRuntime.enabled -}}
 apiVersion: inference.llmaz.io/v1alpha1
 kind: BackendRuntime
 metadata:

diff --git a/chart/templates/lws/leaderworkerset.yaml b/chart/templates/lws/leaderworkerset.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.leaderWorkerSet.enable -}}
+{{- if .Values.leaderWorkerSet.enabled -}}
 apiVersion: v1
 kind: Namespace
 metadata:

diff --git a/chart/templates/prometheus/prometheus.yaml b/chart/templates/prometheus/prometheus.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}

diff --git a/chart/templates/prometheus/service-monitor.yaml b/chart/templates/prometheus/service-monitor.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}

diff --git a/chart/templates/prometheus/serviceaccount.yaml b/chart/templates/prometheus/serviceaccount.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.prometheus.enable }}
+{{- if .Values.prometheus.enabled }}
 {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
 {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
 {{- end }}

diff --git a/chart/values.global.yaml b/chart/values.global.yaml
@@ -1,7 +1,7 @@
 fullnameOverride: "llmaz"
 
 backendRuntime:
-  enable: true
+  enabled: true
   llamacpp:
     image:
       repository: ghcr.io/ggerganov/llama.cpp
@@ -24,8 +24,26 @@ backendRuntime:
       tag: v0.7.3
 
 leaderWorkerSet:
-  enable: true
+  enabled: true
 
 prometheus:
   # Prometheus is required to enable smart routing.
-  enable: true
+  enabled: true
+
+open-webui:
+  enabled: false
+  persistence:
+    enabled: false
+  enableOpenaiApi: true
+  openaiBaseApiUrl: "https://api.openai.com/v1"
+  extraEnvVars:
+  - name: OPENAI_API_KEY
+    value: "ChangeMe"
+  ollama:
+    enabled: false
+  pipelines:
+    enabled: false
+  tika:
+    enabled: false
+  redis-cluster:
+    enabled: false
diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml
@@ -8,6 +8,5 @@ spec:
     modelName: qwen2-0--5b-gguf
   backendRuntimeConfig:
     backendName: llamacpp
-    configName: default
     args:
       - -fa # use flash attention
diff --git a/docs/open-webui.md b/docs/open-webui.md
@@ -0,0 +1,47 @@
+# Open-WebUI
+
+[Open WebUI](https://github.com/open-webui/open-webui) is a user-friendly AI interface with OpenAI-compatible APIs, serving as the default chatbot for llmaz.
+
+## Prerequisites
+
+- Make sure you're located in **llmaz-system** namespace, haven't tested with other namespaces.
+- Make sure [EnvoyGateway](https://github.com/envoyproxy/gateway) and [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway) are installed, both of them are installed by default in llmaz.
+
+## How to use
+
+1. Enable Open WebUI in the `values.global.yaml` file, open-webui is disabled by default.
+
+    ```yaml
+    open-webui:
+      enabled: true
+    ```
+
+    > Optional to set the `persistence=true` to persist the data, recommended for production.
+
+2. Run `kubectl get svc -n envoy-gateway-system` to list out the services, the output looks like:
+
+    ```cmd
+    envoy-default-default-envoy-ai-gateway-dbec795a   LoadBalancer   10.96.145.150   <pending>     80:30548/TCP                              132m
+    envoy-gateway                                     ClusterIP      10.96.52.76     <none>        18000/TCP,18001/TCP,18002/TCP,19001/TCP   172m
+    ```
+
+3. Set `openaiBaseApiUrl` in the `values.global.yaml` like:
+
+    ```yaml
+    open-webui:
+      enabled: true
+      openaiBaseApiUrl: http://envoy-default-default-envoy-ai-gateway-dbec795a.envoy-gateway-system.svc.cluster.local/v1
+    ```
+
+4. Run `make install-chatbot` to install the chatbot.
+
+5. Port forwarding by:
+    ```
+    kubectl port-forward svc/open-webui 8080:80
+    ```
+
+6. Visit [http://localhost:8080](http://localhost:8080) to access the Open WebUI.
+
+7. Configure the administrator for the first time.
+
+**That's it! You can now chat with llmaz models with Open WebUI.**