Skip to content

Commit

Permalink
Adapt latest chagnes for data-prep
Browse files Browse the repository at this point in the history
Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
  • Loading branch information
lianhao committed Jan 13, 2025
1 parent 7dfe752 commit aac5501
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 85 deletions.
2 changes: 2 additions & 0 deletions helm-charts/common/data-prep/.helmignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@
.idea/
*.tmproj
.vscode/
# CI values
ci*-values.yaml
20 changes: 10 additions & 10 deletions helm-charts/common/data-prep/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ To install data-prep chart, run the following:

```console
cd GenAIInfra/helm-charts/common/data-prep
export REDIS_URL="redis://redis-vector-db:6379"
export REDIS_HOST="redis-vector-db"
export TEI_EMBEDDING_ENDPOINT="http://tei"
helm dependency update
helm install data-prep . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
helm install data-prep . --set REDIS_HOST=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
```

## (Option2): Installing the chart with dependencies automatically
Expand All @@ -38,21 +38,21 @@ Then run the command `kubectl port-forward svc/data-prep 6007:6007` to expose th
Open another terminal and run the following command to verify the service if working:

```console
curl http://localhost:6007/v1/dataprep \
curl http://localhost:6007/v1/dataprep/ingest \
-X POST \
-H "Content-Type: multipart/form-data" \
-F "files=@./README.md"
```

## Values

| Key | Type | Default | Description |
| ---------------------- | ------ | ----------------------- | ---------------------------------------- |
| image.repository | string | `"opea/dataprep-redis"` | |
| service.port | string | `"6007"` | |
| REDIS_URL | string | `""` | |
| TEI_EMBEDDING_ENDPOINT | string | `""` | |
| global.monitoring | bool | `false` | See ../../monitoring.md before enabling! |
| Key | Type | Default | Description |
| ----------------------- | ------ | ----------------------- | ---------------------------------------- |
| image.repository | string | `"opea/dataprep-redis"` | |
| service.port | string | `"6007"` | |
| DATAPREP_COMPONENT_NAME | string | `"OPEA_DATAPREP_REDIS"` | vector DB backend |
| TEI_EMBEDDING_ENDPOINT | string | `""` | |
| global.monitoring | bool | `false` | See ../../monitoring.md before enabling! |

## Milvus support

Expand Down
11 changes: 3 additions & 8 deletions helm-charts/common/data-prep/milvus-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,10 @@ redis-vector-db:
tei:
enabled: true

image:
repository: opea/dataprep-milvus

port: 6010
# text embedding inference service URL, e.g. http://<service-name>:<port>
#TEI_EMBEDDING_ENDPOINT: "http://embedding-tei:80"
# TEI_EMBEDDING_ENDPOINT: "http://data-prep-tei:80"
# milvus DB configurations
#MILVUS_HOST: "milvustest"
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS"
# MILVUS_HOST: "data-prep-milvus"
MILVUS_PORT: "19530"
COLLECTION_NAME: "rag_milvus"
MOSEC_EMBEDDING_ENDPOINT: ""
MOSEC_EMBEDDING_MODEL: ""
32 changes: 17 additions & 15 deletions helm-charts/common/data-prep/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,36 @@ metadata:
labels:
{{- include "data-prep.labels" . | nindent 4 }}
data:
{{- if .Values.MOSEC_EMBEDDING_ENDPOINT }}
MOSEC_EMBEDDING_ENDPOINT: {{ .Values.MOSEC_EMBEDDING_ENDPOINT | quote}}
MOSEC_EMBEDDING_MODEL: {{ .Values.MOSEC_EMBEDDING_MODEL | quote}}
{{- else if .Values.TEI_EMBEDDING_ENDPOINT }}
TEI_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
{{- if .Values.TEI_EMBEDDING_ENDPOINT }}
TEI_EMBEDDING_ENDPOINT: {{ tpl .Values.TEI_EMBEDDING_ENDPOINT . | quote}}
{{- else if not .Values.LOCAL_EMBEDDING_MODEL }}
TEI_ENDPOINT: "http://{{ .Release.Name }}-tei"
TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei"
{{- end }}
{{- if .Values.LOCAL_EMBEDDING_MODEL }}
EMBED_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
LOCAL_EMBEDDING_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
{{- end }}
{{- if .Values.REDIS_URL }}
REDIS_URL: {{ .Values.REDIS_URL | quote}}
DATAPREP_COMPONENT_NAME: {{ .Values.DATAPREP_COMPONENT_NAME | quote }}
{{- if eq .Values.DATAPREP_COMPONENT_NAME "OPEA_DATAPREP_REDIS" }}
{{- if .Values.REDIS_HOST }}
REDIS_HOST: {{ tpl .Values.REDIS_HOST . | quote}}
{{- else }}
REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379"
REDIS_HOST: "{{ .Release.Name }}-redis-vector-db"
{{- end }}
REDIS_PORT: {{ .Values.REDIS_PORT | quote }}
REDIS_SSL: {{ .Values.REDIS_SSL | quote }}
REDIS_PASSWORD: {{ .Values.REDIS_PASSWORD | quote }}
REDIS_USERNAME: {{ .Values.REDIS_USERNAME | quote }}
INDEX_NAME: {{ .Values.INDEX_NAME | quote }}
KEY_INDEX_NAME: {{ .Values.KEY_INDEX_NAME | quote }}
SEARCH_BATCH_SIZE: {{ .Values.SEARCH_BATCH_SIZE | quote }}
{{- else if eq .Values.DATAPREP_COMPONENT_NAME "OPEA_DATAPREP_MILVUS" }}
{{- if .Values.MILVUS_HOST }}
MILVUS_HOST: {{ .Values.MILVUS_HOST | quote }}
MILVUS_HOST: {{ tpl .Values.MILVUS_HOST . | quote }}
{{- else }}
MILVUS_HOST: "{{ .Release.Name }}-milvus"
{{- end }}
MILVUS: {{ .Values.MILVUS_HOST | quote }}
MILVUS_PORT: {{ .Values.MILVUS_PORT | quote }}
{{- if .Values.COLLECTION_NAME }}
COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }}
{{- end }}
HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
Expand All @@ -46,9 +47,10 @@ data:
{{- end }}
http_proxy: {{ .Values.global.http_proxy | quote }}
https_proxy: {{ .Values.global.https_proxy | quote }}
{{- if and (not .Values.REDIS_URL) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }}
no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Values.global.no_proxy }}"
{{- if and (not .Values.REDIS_HOST) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }}
no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Release.Name }}-milvus,{{ .Values.global.no_proxy }}"
{{- else }}
no_proxy: {{ .Values.global.no_proxy | quote }}
{{- end }}
LOGFLAG: {{ .Values.LOGFLAG | quote }}
RETRY_COUNT: {{ .Values.retryCount | default 60 | quote }}
30 changes: 29 additions & 1 deletion helm-charts/common/data-prep/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,36 @@ spec:
serviceAccountName: {{ include "data-prep.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
initContainers:
- name: wait-for-db
envFrom:
- configMapRef:
name: {{ include "data-prep.fullname" . }}-config
{{- if .Values.global.extraEnvConfig }}
- configMapRef:
name: {{ .Values.global.extraEnvConfig }}
optional: true
{{- end }}
image: busybox:1.36
command: ["sh", "-c"]
args:
- |
{{- if eq .Values.DATAPREP_COMPONENT_NAME "OPEA_DATAPREP_REDIS" }}
TESTHOST=$(REDIS_HOST);
TESTPORT=$(REDIS_PORT);
{{- else if eq .Values.DATAPREP_COMPONENT_NAME "OPEA_DATAPREP_MILVUS" }}
TESTHOST=$(MILVUS_HOST);
TESTPORT=$(MILVUS_PORT);
{{- end }}
retry_count=$(RETRY_COUNT);
j=1;
while ! nc -z ${TESTHOST} ${TESTPORT}; do
[[ $j -ge ${retry_count} ]] && echo "ERROR: ${TESTHOST}:${TESTPORT} is NOT reachable in $j seconds!" && exit 1;
j=$((j+1)); sleep 1;
done;
echo "${TESTHOST}:${TESTPORT} is reachable within $j seconds.";
containers:
- name: {{ .Release.Name }}
- name: {{ .Chart.Name }}
envFrom:
- configMapRef:
name: {{ include "data-prep.fullname" . }}-config
Expand Down
17 changes: 12 additions & 5 deletions helm-charts/common/data-prep/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,24 @@ spec:
- |
echo "test file" > /tmp/file1.txt;
max_retry=20;
echo "test upload...";
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep -sS --fail-with-body \
curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep/ingest -sS --fail-with-body \
-X POST \
-H "Content-Type: multipart/form-data" \
-F "files=@/tmp/file1.txt" && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep/delete_file -sS \
-X POST \
-H "Content-Type: application/json" \
-d '{"file_path": "file1.txt"}';
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
echo "test delete...";
for ((i=1; i<=max_retry; i++)); do
curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep/delete -sS --fail-with-body \
-X POST \
-H "Content-Type: application/json" \
-d '{"file_path": "file1.txt"}' && break;
curlcode=$?
if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
done;
if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
restartPolicy: Never
88 changes: 42 additions & 46 deletions helm-charts/common/data-prep/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,39 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

tei:
enabled: false
milvus:
enabled: false
redis-vector-db:
enabled: false
# Configurations for OPEA microservice data-prep
# Set it as a non-null string, such as true, if you want to enable logging.
LOGFLAG: ""

# data-prep needs to talk to different vector-DB backend service, e.g. redis, milvus
# Default is to use redis as vector-DB backend
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_REDIS"
REDIS_HOST: ""
REDIS_PORT: 6379
REDIS_SSL: false
REDIS_PASSWORD: ""
REDIS_USERNAME: ""
INDEX_NAME: "rag_redis"
KEY_INDEX_NAME: "file-keys"
SEARCH_BATCH_SIZE: 10

# Uncomment and set the following settings to use milvus as vector-DB backend
# DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MILVUS"
# MILVUS_HOST: ""
# MILVUS_PORT: 19530
# COLLECTION_NAME: "rag_milvus"

# retriever-usvc can do embedding locally or talk to remote tei service to do embedding
# The following embedding related settings are listed in precedence order
# text embedding inference service URL, e.g. http://<service-name>:<port>
TEI_EMBEDDING_ENDPOINT: ""
# local embedding model
LOCAL_EMBEDDING_MODEL: ""

replicaCount: 1

image:
repository: opea/dataprep-redis
repository: opea/dataprep
# Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
# pullPolicy: ""
# Overrides the image tag whose default is the chart appVersion.
Expand Down Expand Up @@ -52,30 +74,19 @@ securityContext:
seccompProfile:
type: RuntimeDefault

port: 6007
port: 5000
service:
type: ClusterIP
port: 6007

resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
resources:
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
requests:
cpu: 100m
memory: 128Mi

livenessProbe:
httpGet:
path: v1/health_check
port: data-prep
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 24
readinessProbe:
httpGet:
path: v1/health_check
Expand All @@ -96,29 +107,6 @@ tolerations: []

affinity: {}

# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
LOGFLAG: ""

# text embedding inference service URL, e.g. http://<service-name>:<port>
TEI_EMBEDDING_ENDPOINT: ""

# local embedder's model
LOCAL_EMBEDDING_MODEL: ""

# redis DB service URL, e.g. redis://<service-name>:<port>
REDIS_URL: ""
INDEX_NAME: "rag-redis"
KEY_INDEX_NAME: "file-keys"
SEARCH_BATCH_SIZE: 10

# milvus DB configurations
MILVUS_HOST: ""
MILVUS_PORT: ""
COLLECTION_NAME: ""
MOSEC_EMBEDDING_ENDPOINT: ""
MOSEC_EMBEDDING_MODEL: ""

global:
http_proxy: ""
https_proxy: ""
Expand All @@ -134,3 +122,11 @@ global:

# Prometheus Helm install release name needed for serviceMonitors
prometheusRelease: prometheus-stack

# For CI tests only
tei:
enabled: false
milvus:
enabled: false
redis-vector-db:
enabled: false

0 comments on commit aac5501

Please sign in to comment.