Merge branch 'main' into helm/videoqna

opea-project · krish918 · Oct 9, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 17, 2024
commit c932c31eb30b6ec64ee6edf9dab637e48e47f637
@@ -3,11 +3,16 @@
 
 apiVersion: v2
 name: lvm-uservice
-description: A Helm chart for deploying LVM Microservice
+description: The Helm chart for deploying lvm as microservice
 type: application
 version: 1.0.0
+# The lvm microservice server version
 appVersion: "v1.0"
 dependencies:
+  - name: tgi
+    version: 1.0.0
+    repository: file://../tgi
+    condition: tgi.enabled
   - name: lvm-serving
     version: 1.0.0
     repository: file://../lvm-serving

@@ -2,6 +2,37 @@
 
 **Helm chart for deploying lvm-uservice microservice.**
 
+There are two versions of `lvm-uservice`. First version runs with `tgi` service and another one runs with `lvm-serving` service. We will try to learn both setups in following sections.
+
+## 1. Installing lvm-uservice to be used with tgi microservice
+
+In this setup, lvm-uservice depends on TGI, you should set LVM_ENDPOINT as tgi endpoint.
+
+### (Option1): Installing the chart separately
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
-After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+After you've deployed the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
-After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+After you've deployed the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```bash
+cd GenAIInfra/helm-charts/common/lvm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+export LVM_ENDPOINT="http://tgi"
+helm dependency update
+helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait
+```
+
+### (Option2): Installing the chart with dependencies automatically (with auto-installing tgi)
+
+```bash
+cd GenAIInfra/helm-charts/common/lvm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait
+```
+
 ## 2. Installing lvm-uservice to be used with lvm-serving microservice (serving VideoLlama-7B)
 
 This setup of `lvm-uservice` is utilized in some of the examples like [VideoQnA](https://github.com/opea-project/GenAIExamples/tree/main/VideoQnA). Here, `lvm-uservice` helps communicate to `lvm-serving` microservice. It facilitates sending queries and receiving response from `lvm-serving` microservice. Hence, it depends on lvm-serving microservice and you should make sure that `lvmEndpoint` value is set properly.
@@ -23,10 +54,10 @@ export https_proxy="your_http_proxy"
 export http_proxy="your_https_proxy"
 
 helm dependency update
-helm install lvm-uservice . --set lvmEndpoint=${LVM_ENDPOINT} --set global.https_proxy=${https_proxy} --set global.http_proxy=${http_proxy} --wait
+helm install lvm-uservice . -f ./variant_videoqna-values.yaml --set lvmEndpoint=${LVM_ENDPOINT} --set global.https_proxy=${https_proxy} --set global.http_proxy=${http_proxy} --wait
 ```
 
-### (Option2): Installing the chart with dependencies automatically (lvm-serving dependency)
+### (Option2): Installing the chart with dependencies automatically (with auto-installing lvm-serving dependency)
 
 ```bash
 cd GenAIInfra/helm-charts/common/lvm-uservice
@@ -44,29 +75,47 @@ export https_proxy="your_http_proxy"
 export http_proxy="your_https_proxy"
 
 helm dependency update
-helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set lvm-serving.enabled=true --set lvm-serving.llmDownload=${LLM_DOWNLOAD} --set global.modelUseHostPath=${MODELDIR} --set global.cacheUseHostPath=${CACHEDIR} --set global.https_proxy=${https_proxy} --set global.http_proxy=${http_proxy} --wait
+helm install lvm-uservice . -f ./variant_videoqna-values.yaml --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set lvm-serving.enabled=true --set lvm-serving.llmDownload=${LLM_DOWNLOAD} --set global.modelUseHostPath=${MODELDIR} --set global.cacheUseHostPath=${CACHEDIR} --set global.https_proxy=${https_proxy} --set global.http_proxy=${http_proxy} --wait
 ```
 
 ## Verify
 
 To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
 
-Then run the command `kubectl port-forward svc/lvm-uservice 9000:9000` to expose the lvm-uservice service for access.
+### For TGI based lvm-uservice
+Run the command `kubectl port-forward svc/lvm-uservice 9399:9399` to expose the lvm-uservice service for access.
+
+### For lvm-serving based lvm-uservice
+Run the command `kubectl port-forward svc/lvm-uservice 9000:9000` to expose the lvm-uservice service for access.
+
 
 Open another terminal and run the following command to verify the service if working:
 
+### Verify lvm-uservice running with lvm-serving (Video-Llama 7B) service
+
 ```bash
 curl http://localhost:9000/v1/lvm \
   -X POST \
   -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \
   -H 'Content-Type: application/json'
 ```
 
+### Verify lvm-uservice running with TGI service
+
+```bash
+curl http://localhost:9399/v1/chat/completions \
+    -X POST \
+    -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+    -H 'Content-Type: application/json'
+```
+
 ## Values
 
-| Key                             | Type   | Default                  | Description                     |
-| ------------------------------- | ------ | ------------------------ | ------------------------------- |
-| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                     | Your own Hugging Face API token |
-| image.repository                | string | `"opea/lvm-video-llama"` |                                 |
-| service.port                    | string | `"9000"`                 |                                 |
-| lvmEndpoint                     | string | `""`                     | LVM Serving endpoint            |
+| Key                             | Type   | Default          | Description                     |
+| ------------------------------- | ------ | ---------------- | ------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`             | Your own Hugging Face API token |
+| image.repository                | string | `"opea/lvm-tgi"` |                                 |
+| videoqna: image.repository      | string | `"opea/lvm-video-llama"` |                         |
+| service.port                    | string | `"9000"`         |                                 |
+| LVM_ENDPOINT                    | string | `""`             | LVM endpoint                    |
+| global.monitoring               | bop;   | false            | Service usage metrics           |
@@ -3,5 +3,5 @@
 
 # Default values for lvm-uservice.
 
-lvm-serving:
+tgi:
   enabled: true
@@ -15,9 +15,14 @@ data:
   {{- else if contains "lvm-video-llama" .Values.image.repository }}
   LVM_ENDPOINT: "http://{{ .Release.Name }}-lvm-serving"
   {{- end }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
   http_proxy: {{ .Values.global.http_proxy | quote }}
   https_proxy: {{ .Values.global.https_proxy | quote }}
-  {{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  {{- if and (not .Values.LVM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
   no_proxy: "{{ .Release.Name }}-tgi,{{ .Release.Name }}-lvm-serving,{{ .Values.global.no_proxy }}"
   {{- else }}
   no_proxy: {{ .Values.global.no_proxy | quote }}

@@ -19,10 +19,7 @@ spec:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       labels:
-        {{- include "lvm-uservice.labels" . | nindent 8 }}
-        {{- with .Values.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
+        {{- include "lvm-uservice.selectorLabels" . | nindent 8 }}
     spec:
       {{- with .Values.imagePullSecrets }}
       imagePullSecrets:
@@ -45,23 +42,29 @@ spec:
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
           ports:
-            - name: http
+            - name: lvm-uservice
               containerPort: {{ .Values.service.containerPort }}
               protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
           livenessProbe:
             {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
           readinessProbe:
             {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
-          {{- with .Values.volumeMounts }}
-          volumeMounts:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-      {{- with .Values.volumes }}
       volumes:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
+        - name: tmp
+          emptyDir: {}
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
@@ -74,3 +77,12 @@ spec:
       tolerations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "lvm-uservice.selectorLabels" . | nindent 14 }}
+      {{- end }}
@@ -13,6 +13,6 @@ spec:
     - port: {{ .Values.service.port }}
       targetPort: {{ .Values.service.containerPort }}
       protocol: TCP
-      name: http
+      name: lvm-uservice
   selector:
     {{- include "lvm-uservice.selectorLabels" . | nindent 4 }}
@@ -9,6 +9,7 @@ metadata:
     {{- include "lvm-uservice.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
 spec:
   containers:
     - name: curl

@@ -2,6 +2,104 @@
 # SPDX-License-Identifier: Apache-2.0
 
 # Default values for lvm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
 
+tgi:
+  enabled: false
+  LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
 lvm-serving:
   enabled: false
+  llmDownload: true
+
+replicaCount: 1
+LVM_ENDPOINT: ""
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+image:
+  repository: opea/lvm-tgi
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for lvm service is 9399
+  port: 9399
+  containerPort: 9399
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack