@@ -7,31 +7,64 @@ export WORKPATH=$(dirname "$PWD")
7
7
export WORKDIR=$WORKPATH /../../
8
8
echo " WORKDIR=${WORKDIR} "
9
9
export IP_ADDRESS=$( hostname -I | awk ' {print $1}' )
10
+ export HOST_IP=${IP_ADDRESS}
10
11
LOG_PATH=$WORKPATH
11
12
12
- # ### env vars for LLM endpoint #############
13
+ # Proxy settings
14
+ export NO_PROXY=" ${NO_PROXY} ,${HOST_IP} "
15
+ export HTTP_PROXY=" ${http_proxy} "
16
+ export HTTPS_PROXY=" ${https_proxy} "
17
+
18
+ # VLLM configuration
13
19
MODEL=meta-llama/Llama-3.3-70B-Instruct
14
- VLLM_IMAGE=opea/vllm-gaudi:latest
15
- VLLM_PORT=8086
16
- HF_CACHE_DIR=${model_cache:- " /data2/huggingface" }
17
- VLLM_VOLUME=${HF_CACHE_DIR}
18
- # ######################################
20
+ export VLLM_PORT=" ${VLLM_PORT:- 8086} "
21
+
22
+ # export HF_CACHE_DIR="${HF_CACHE_DIR:-"./data"}"
23
+ export HF_CACHE_DIR=${model_cache:- " ./data2/huggingface" }
24
+ export VLLM_VOLUME=" ${HF_CACHE_DIR:- " ./data2/huggingface" } "
25
+ export VLLM_IMAGE=" ${VLLM_IMAGE:- opea/ vllm-gaudi: latest} "
26
+ export LLM_MODEL_ID=" ${LLM_MODEL_ID:- meta-llama/ Llama-3.3-70B-Instruct} "
27
+ export LLM_MODEL=$LLM_MODEL_ID
28
+ export LLM_ENDPOINT=" http://${IP_ADDRESS} :${VLLM_PORT} "
29
+ export MAX_LEN=" ${MAX_LEN:- 16384} "
30
+ export NUM_CARDS=" ${NUM_CARDS:- 4} "
31
+
32
+ # Recursion limits
33
+ export RECURSION_LIMIT_WORKER=" ${RECURSION_LIMIT_WORKER:- 12} "
34
+ export RECURSION_LIMIT_SUPERVISOR=" ${RECURSION_LIMIT_SUPERVISOR:- 10} "
35
+
36
+ # Hugging Face API token
37
+ export HUGGINGFACEHUB_API_TOKEN=" ${HF_TOKEN} "
38
+
39
+ # LLM configuration
40
+ export TEMPERATURE=" ${TEMPERATURE:- 0.5} "
41
+ export MAX_TOKENS=" ${MAX_TOKENS:- 4096} "
42
+ export MAX_INPUT_TOKENS=" ${MAX_INPUT_TOKENS:- 2048} "
43
+ export MAX_TOTAL_TOKENS=" ${MAX_TOTAL_TOKENS:- 4096} "
44
+
45
+ # Worker URLs
46
+ export WORKER_FINQA_AGENT_URL=" http://${IP_ADDRESS} :9095/v1/chat/completions"
47
+ export WORKER_RESEARCH_AGENT_URL=" http://${IP_ADDRESS} :9096/v1/chat/completions"
48
+
49
+ # DocSum configuration
50
+ export DOCSUM_COMPONENT_NAME=" ${DOCSUM_COMPONENT_NAME:- " OpeaDocSumvLLM" } "
51
+ export DOCSUM_ENDPOINT=" http://${IP_ADDRESS} :9000/v1/docsum"
52
+
53
+ # Toolset and prompt paths
54
+ export TOOLSET_PATH=$WORKDIR /GenAIExamples/FinanceAgent/tools/
55
+ export PROMPT_PATH=$WORKDIR /GenAIExamples/FinanceAgent/prompts/
19
56
20
57
# ### env vars for dataprep #############
21
- export hOST_IP=${IP_ADDRESS}
22
58
export DATAPREP_PORT=" 6007"
23
59
export TEI_EMBEDDER_PORT=" 10221"
24
60
export REDIS_URL_VECTOR=" redis://${IP_ADDRESS} :6379"
25
61
export REDIS_URL_KV=" redis://${IP_ADDRESS} :6380"
26
- export LLM_MODEL=$MODEL
27
- export LLM_ENDPOINT=" http://${IP_ADDRESS} :${VLLM_PORT} "
62
+
28
63
export DATAPREP_COMPONENT_NAME=" OPEA_DATAPREP_REDIS_FINANCE"
29
64
export EMBEDDING_MODEL_ID=" BAAI/bge-base-en-v1.5"
30
65
export TEI_EMBEDDING_ENDPOINT=" http://${IP_ADDRESS} :${TEI_EMBEDDER_PORT} "
31
66
# ######################################
32
67
33
-
34
-
35
68
function get_genai_comps() {
36
69
if [ ! -d " GenAIComps" ] ; then
37
70
git clone --depth 1 --branch ${opea_branch:- " main" } https://github.com/opea-project/GenAIComps.git
@@ -70,11 +103,10 @@ function build_vllm_docker_image() {
70
103
fi
71
104
}
72
105
73
-
74
106
function start_vllm_service_70B() {
75
107
echo " token is ${HF_TOKEN} "
76
108
echo " start vllm gaudi service"
77
- echo " **************MODEL is $MODEL **************"
109
+ echo " **************MODEL is $LLM_MODEL_ID **************"
78
110
docker run -d --runtime=habana --rm --name " vllm-gaudi-server" -e HABANA_VISIBLE_DEVICES=all -p $VLLM_PORT :8000 -v $VLLM_VOLUME :/data -e HF_TOKEN=$HF_TOKEN -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host $VLLM_IMAGE --model ${MODEL} --max-seq-len-to-capture 16384 --tensor-parallel-size 4
79
111
sleep 10s
80
112
echo " Waiting vllm gaudi ready"
@@ -95,7 +127,6 @@ function start_vllm_service_70B() {
95
127
echo " Service started successfully"
96
128
}
97
129
98
-
99
130
function stop_llm(){
100
131
cid=$( docker ps -aq --filter " name=vllm-gaudi-server" )
101
132
echo " Stopping container $cid "
@@ -104,7 +135,17 @@ function stop_llm(){
104
135
}
105
136
106
137
function start_dataprep_and_agent(){
107
- docker compose -f $WORKPATH /docker_compose/intel/hpu/gaudi/compose.yaml up -d tei-embedding-serving redis-vector-db redis-kv-store dataprep-redis-finance worker-finqa-agent worker-research-agent docsum-vllm-gaudi supervisor-react-agent agent-ui
138
+ docker compose -f $WORKPATH /docker_compose/intel/hpu/gaudi/compose.yaml up -d \
139
+ tei-embedding-serving \
140
+ redis-vector-db \
141
+ redis-kv-store \
142
+ dataprep-redis-finance \
143
+ worker-finqa-agent \
144
+ worker-research-agent \
145
+ docsum-vllm-gaudi \
146
+ supervisor-react-agent \
147
+ agent-ui
148
+
108
149
sleep 1m
109
150
}
110
151
@@ -219,7 +260,6 @@ function stop_agent_docker() {
219
260
done
220
261
}
221
262
222
-
223
263
echo " workpath: $WORKPATH "
224
264
echo " =================== Stop containers ===================="
225
265
stop_llm
@@ -232,9 +272,9 @@ echo "=================== #1 Building docker images===================="
232
272
build_vllm_docker_image
233
273
build_dataprep_agent_images
234
274
235
- # ### for local test
275
+ # ## for local test
236
276
# build_agent_image_local
237
- # echo "=================== #1 Building docker images completed===================="
277
+ echo " =================== #1 Building docker images completed===================="
238
278
239
279
echo " =================== #2 Start vllm endpoint===================="
240
280
start_vllm_service_70B
0 commit comments