forked from NVIDIA-AI-Blueprints/ai-virtual-assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yaml
389 lines (369 loc) · 14.7 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
include:
- path:
- nims.yaml
services:
# =======================
# Agent Services
# =======================
agent-chain-server:
container_name: agent-chain-server
image: nvcr.io/nvidia/blueprint/aiva-customer-service-agent:1.1.0
build:
# Set context to repo's root directory
context: ../../
dockerfile: src/agent/Dockerfile
command: --port 8081 --host 0.0.0.0 --workers 1 --loop asyncio
environment:
EXAMPLE_PATH: './src/agent'
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
# Cache name to store user conversation
# supported type inmemory, redis
APP_CACHE_NAME: ${APP_CACHE_NAME:-"redis"}
APP_CACHE_URL: ${APP_CACHE_URL:-"redis:6379"}
# Database name to store user conversation
# supported type postgres
APP_DATABASE_NAME: ${APP_DATABASE_NAME:-"postgres"}
APP_DATABASE_URL: ${APP_DATABASE_URL:-"postgres:5432"}
# Checkpointer name to store intermediate state of conversation
# supported type postgres, inmemory
APP_CHECKPOINTER_NAME: ${APP_CHECKPOINTER_NAME:-"postgres"}
APP_CHECKPOINTER_URL: ${APP_CHECKPOINTER_URL:-"postgres:5432"}
# Postgres config
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
POSTGRES_DB: ${POSTGRES_DB:-postgres}
# Postgres database name for customer data
POSTGRES_USER_READONLY: ${POSTGRES_USER:-postgres_readonly}
POSTGRES_PASSWORD_READONLY: ${POSTGRES_PASSWORD:-readonly_password}
CUSTOMER_DATA_DB: ${CUSTOMER_DATA_DB:-customer_data}
CANONICAL_RAG_URL: http://unstructured-retriever:8081
STRUCTURED_RAG_URI: http://structured-retriever:8081
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
GRAPH_RECURSION_LIMIT: 20
#GRAPH_TIMEOUT_IN_SEC: 20 # with meta/llama-3.1-70b-instruct
GRAPH_TIMEOUT_IN_SEC: 50 # with meta/llama-3.1-405b-instruct
RETURN_WINDOW_CURRENT_DATE: '2024-10-23' # Leave it empty to get the current date
RETURN_WINDOW_THRESHOLD_DAYS: 30
# Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
LOGLEVEL: ${LOGLEVEL:-INFO}
# Set the default expiration time (TTL) for Redis keys (in seconds)
REDIS_SESSION_EXPIRY: 12 # in hours
ports:
- "8081:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
- unstructured-retriever
- structured-retriever
- postgres
- redis
# =======================
# UI and related services for Agent Interface
# =======================
api-gateway-server:
build:
context: ../../
dockerfile: ./src/api_gateway/Dockerfile
image: nvcr.io/nvidia/blueprint/aiva-customer-service-api-gateway:1.1.0
command: --port 9000 --host 0.0.0.0 --workers 1
ports:
- "9000:9000"
environment:
AGENT_SERVER_URL: ${AGENT_SERVER_URL:-http://agent-chain-server:8081}
ANALYTICS_SERVER_URL: ${ANALYTICS_SERVER_URL:-http://analytics-server:8081}
#REQUEST_TIMEOUT: 320 # with meta/llama-3.1-70b-instruct
REQUEST_TIMEOUT: 800 # with meta/llama-3.1-405b-instruct
restart: unless-stopped # Optional: Automatically restart the container unless it is stopped
depends_on:
- agent-chain-server
agent-frontend:
image: nvcr.io/nvidia/blueprint/aiva-customer-service-ui:1.1.0
container_name: agent-frontend
environment:
- INFERENCE_ORIGIN=http://api-gateway-server:9000
ports:
- "3001:3001"
restart: unless-stopped # Optional: Automatically restart the container unless it is stopped
depends_on:
- api-gateway-server
# =======================
# Analytics Services - summary/sentiment and similar APIs are exposed as part of analytics MS
# =======================
analytics-server:
container_name: analytics-server
image: nvcr.io/nvidia/blueprint/aiva-customer-service-analytics:1.1.0
build:
# Set context to repo's root directory
context: ../../
dockerfile: src/analytics/Dockerfile
command: --port 8081 --host 0.0.0.0 --workers 1
environment:
EXAMPLE_PATH: './src/analytics'
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
# Database name to store user conversation/summary
# supported type inmemory, redis
APP_DATABASE_NAME: ${APP_DATABASE_NAME:-"postgres"}
APP_DATABASE_URL: ${APP_DATABASE_URL:-"postgres:5432"}
# Postgres config
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
POSTGRES_DB: ${POSTGRES_DB:-postgres}
# Postgres database name for customer data
CUSTOMER_DATA_DB: ${CUSTOMER_DATA_DB:-customer_data}
# Store summary/sentiment in database
PERSIST_DATA: ${PERSIST_DATA:-true}
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
# Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
LOGLEVEL: ${LOGLEVEL:-INFO}
ports:
- "8082:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
postgres:
condition: service_healthy
# =======================
# Retriever Microservices
# =======================
# Fetch relevant document from vectorstore
unstructured-retriever:
container_name: unstructured-retriever
image: nvcr.io/nvidia/blueprint/aiva-customer-service-unstructured-retriever:1.1.0
build:
# Set context to repo's root directory
context: ../../
dockerfile: src/retrievers/Dockerfile
args:
# Build args, used to copy relevant directory inside the container
EXAMPLE_PATH: 'src/retrievers/unstructured_data'
# start the server on port 8081
command: --port 8081 --host 0.0.0.0 --workers 1
environment:
# Path to example directory relative to GenerativeAIExamples/RAG/examples
EXAMPLE_PATH: 'src/retrievers/unstructured_data'
# URL on which vectorstore is hosted
APP_VECTORSTORE_URL: "http://milvus:19530"
# Type of vectordb used to store embedding supported type milvus, pgvector
APP_VECTORSTORE_NAME: "milvus"
# url on which llm model is hosted. If "", Nvidia hosted API is used
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
# embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface
APP_LLM_MODELENGINE: nvidia-ai-endpoints
# url on which llm model is hosted. If "", Nvidia hosted API is used
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
# embedding model engine used for inference, supported type nvidia-ai-endpoints
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
# url on which embedding model is hosted. If "", Nvidia hosted API is used
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
# ranking engine used for inference, supported type nvidia-ai-endpoints
APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
# url on which re-ranking model is hosted. If "", Nvidia hosted API is used
APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
# text splitter model name, it's fetched from huggingface
APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
APP_TEXTSPLITTER_CHUNKSIZE: 506
APP_TEXTSPLITTER_CHUNKOVERLAP: 200
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
# vectorstore collection name to store embeddings
COLLECTION_NAME: ${COLLECTION_NAME:-unstructured_data}
APP_RETRIEVER_TOPK: 4
APP_RETRIEVER_SCORETHRESHOLD: 0.25
# Number of documents to be retrieved from retriever when reranking model is enabled
# This will be then send to re ranker to get `APP_RETRIEVER_TOPK` documents
VECTOR_DB_TOPK: 20
# Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
LOGLEVEL: ${LOGLEVEL:-INFO}
ports:
- "8086:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
- milvus
# Fetch user information form database
structured-retriever:
container_name: structured-retriever
image: nvcr.io/nvidia/blueprint/aiva-customer-service-structured-retriever:1.1.0
build:
context: ../../
dockerfile: src/retrievers/Dockerfile
args:
EXAMPLE_PATH: 'src/retrievers/structured_data'
command: --port 8081 --host 0.0.0.0 --workers 1
environment:
EXAMPLE_PATH: 'src/retrievers/structured_data'
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama-3.1-70b-instruct}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama-3.1-405b-instruct}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints}
APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""}
APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
COLLECTION_NAME: ${COLLECTION_NAME:-structured_data}
APP_VECTORSTORE_URL: "http://milvus:19530"
APP_VECTORSTORE_NAME: "milvus"
# Database name to store user purchase history, only postgres is supported
APP_DATABASE_NAME: ${APP_DATABASE_NAME:-"postgres"}
APP_DATABASE_URL: ${APP_DATABASE_URL:-"postgres:5432"}
POSTGRES_USER: ${POSTGRES_USER:-postgres_readonly}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-readonly_password}
POSTGRES_DB: ${POSTGRES_DB:-customer_data}
CSV_NAME: PdM_machines
LOGLEVEL: ${LOGLEVEL:-INFO}
ports:
- "8087:8081"
expose:
- "8081"
shm_size: 5gb
depends_on:
postgres:
condition: service_healthy
required: false
milvus:
condition: service_healthy
nemollm-embedding:
condition: service_healthy
required: false
# =======================
# Database Services - User purchase history and permanently store conversation details
# =======================
postgres:
container_name: postgres_container
image: postgres:17.1
environment:
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
POSTGRES_DB: ${POSTGRES_DB:-customer_data}
command:
- "postgres"
- "-c"
- "shared_buffers=256MB"
- "-c"
- "max_connections=200"
volumes:
- ./init-scripts:/docker-entrypoint-initdb.d # Mount initialization scripts
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "sh -c 'pg_isready -U postgres -d postgres'"]
interval: 10s
timeout: 3s
retries: 3
restart: unless-stopped
# For visualization purpose
pgadmin:
container_name: pgadmin_container
image: dpage/pgadmin4:8.13.0
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin}
PGADMIN_CONFIG_SERVER_MODE: 'False'
user: '$UID:$GID'
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/pgadmin:/var/lib/pgadmin
ports:
- "${PGADMIN_PORT:-5050}:80"
restart: unless-stopped
# =======================
# Cache Services - To store conversation of user to share among multiple workers
# =======================
redis:
image: redis:7.0.13
restart: always
ports:
- "6379:6379"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/redis-data:/data
redis-commander:
# Visualization tool for redis
image: rediscommander/redis-commander:latest
restart: always
ports:
- "9092:8081"
environment:
- REDIS_HOSTS=local:redis:6379
# =======================
# Vector Store Services
# =======================
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.17
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
healthcheck:
test: ["CMD", "etcdctl", "endpoint", "health"]
interval: 30s
timeout: 20s
retries: 3
minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2024-11-07T00-52-20Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
ports:
- "9011:9011"
- "9010:9010"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9011" --address ":9010"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9010/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
milvus:
container_name: milvus-standalone
image: milvusdb/milvus:v2.4.15-gpu
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9010
KNOWHERE_GPU_MEM_POOL_SIZE: 2048;4096
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
ports:
- "19530:19530"
- "9091:9091"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
depends_on:
etcd:
condition: service_healthy
minio:
condition: service_healthy
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: ["gpu"]
device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
networks:
default:
name: nvidia-rag