diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index 78c15d2d..14385014 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -1,34 +1,6 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Default values for chatqna. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -replicaCount: 1 - -image: - repository: opea/chatqna:latest - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - # tag: "1.0" - -port: 8888 -service: - type: ClusterIP - port: 8888 - -securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - tei: image: repository: ghcr.io/huggingface/tei-gaudi @@ -39,22 +11,14 @@ tei: # To override values in subchart tgi tgi: - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B image: repository: ghcr.io/huggingface/tgi-gaudi tag: "2.0.1" resources: limits: habana.ai/gaudi: 1 - -global: - http_proxy: - https_proxy: - no_proxy: - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LANGCHAIN_TRACING_V2: false - LANGCHAIN_API_KEY: "insert-your-langchain-key-here" - # set modelUseHostPath to host directory if you want to use hostPath volume for model storage - # comment out modeluseHostPath if you want to download the model from huggingface - modelUseHostPath: /mnt/opea-models + extraArgs: + - "--max-input-length" + - "1024" + - "--max-total-tokens" + - "2048" diff --git a/helm-charts/common/retriever-usvc/templates/deployment.yaml b/helm-charts/common/retriever-usvc/templates/deployment.yaml index ffc6029b..63813dd0 100644 --- a/helm-charts/common/retriever-usvc/templates/deployment.yaml +++ b/helm-charts/common/retriever-usvc/templates/deployment.yaml @@ -48,19 +48,17 @@ spec: volumeMounts: - mountPath: /tmp name: tmp - {{- if not .Values.noProbe }} + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} startupProbe: - exec: - command: - - curl - {{- if .Values.TEI_EMBEDDING_ENDPOINT }} - - {{ .Values.TEI_EMBEDDING_ENDPOINT }} - {{- else }} - - http://{{ .Release.Name }}-tei - {{- end }} - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 + {{- toYaml .Values.startupProbe | nindent 12 }} {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} diff --git a/helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml b/helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml index 264f1415..0514b280 100644 --- a/helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml +++ b/helm-charts/common/retriever-usvc/templates/tests/test-pod.yaml @@ -15,12 +15,17 @@ spec: - name: curl #image: alpine/curl image: python:3.10.14 - command: ['sh', '-c'] + command: ['bash', '-c'] args: - | your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)"); - curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \ + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \ -X POST \ -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json' && break; + sleep 10; + done; + if [ $i -gt $max_retry ]; then echo "retriever test failed."; exit 1; fi restartPolicy: Never diff --git a/helm-charts/common/retriever-usvc/values.yaml b/helm-charts/common/retriever-usvc/values.yaml index 380f658d..4eb55e7b 100644 --- a/helm-charts/common/retriever-usvc/values.yaml +++ b/helm-charts/common/retriever-usvc/values.yaml @@ -55,6 +55,27 @@ resources: {} # cpu: 100m # memory: 128Mi +livenessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + nodeSelector: {} tolerations: [] diff --git a/helm-charts/common/tgi/nv-values.yaml b/helm-charts/common/tgi/nv-values.yaml index 0b69debe..507001a3 100644 --- a/helm-charts/common/tgi/nv-values.yaml +++ b/helm-charts/common/tgi/nv-values.yaml @@ -5,56 +5,10 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -replicaCount: 1 - -port: 2080 - image: repository: ghcr.io/huggingface/text-generation-inference - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. tag: "2.0" -imagePullSecrets: [] -nameOverride: "" -fullnameOverride: "" - -podAnnotations: {} - -podSecurityContext: {} - # fsGroup: 2000 - -securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - -service: - type: ClusterIP - resources: limits: nvidia.com/gpu: 1 - -nodeSelector: {} - -tolerations: [] - -affinity: {} - -LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - -global: - http_proxy: "" - https_proxy: "" - no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - # set modelUseHostPath to host directory if you want to use hostPath volume for model storage - # comment out modeluseHostPath if you want to download the model from huggingface - modelUseHostPath: /mnt/opea-models diff --git a/manifests/common/retriever-usvc.yaml b/manifests/common/retriever-usvc.yaml index 0d53cb60..9ec7fb09 100644 --- a/manifests/common/retriever-usvc.yaml +++ b/manifests/common/retriever-usvc.yaml @@ -106,6 +106,26 @@ spec: volumeMounts: - mountPath: /tmp name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 resources: {} volumes: diff --git a/manifests/common/tgi_nv.yaml b/manifests/common/tgi_nv.yaml index 857b59b3..ff83ac88 100644 --- a/manifests/common/tgi_nv.yaml +++ b/manifests/common/tgi_nv.yaml @@ -16,10 +16,7 @@ metadata: data: MODEL_ID: "Intel/neural-chat-7b-v3-3" PORT: "2080" - HUGGING_FACE_HUB_TOKEN: "insert-your-huggingface-token-here" HF_TOKEN: "insert-your-huggingface-token-here" - MAX_INPUT_TOKENS: "1024" - MAX_TOTAL_TOKENS: "4096" http_proxy: "" https_proxy: "" no_proxy: "" @@ -102,6 +99,23 @@ spec: - name: http containerPort: 2080 protocol: TCP + livenessProbe: + failureThreshold: 24 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http + startupProbe: + failureThreshold: 120 + initialDelaySeconds: 5 + periodSeconds: 5 + tcpSocket: + port: http resources: limits: nvidia.com/gpu: 1