Skip to content

Commit

Permalink
Update docker-compose.yaml (NVIDIA-AI-Blueprints#16)
Browse files Browse the repository at this point in the history
Use 405b model
  • Loading branch information
rkharwar-nv authored Jan 7, 2025
1 parent 849a802 commit f294d9e
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions deploy/compose/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ services:
command: --port 8081 --host 0.0.0.0 --workers 1 --loop asyncio
environment:
EXAMPLE_PATH: './src/agent'
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
# Cache name to store user conversation
Expand All @@ -43,7 +44,8 @@ services:
STRUCTURED_RAG_URI: http://structured-retriever:8081
NVIDIA_API_KEY: ${NVIDIA_API_KEY}
GRAPH_RECURSION_LIMIT: 20
GRAPH_TIMEOUT_IN_SEC: 20
#GRAPH_TIMEOUT_IN_SEC: 20 # with meta/llama-3.1-70b-instruct
GRAPH_TIMEOUT_IN_SEC: 50 # with meta/llama-3.1-405b-instruct
RETURN_WINDOW_CURRENT_DATE: '2024-10-23' # Leave it empty to get the current date
RETURN_WINDOW_THRESHOLD_DAYS: 30
# Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
Expand Down Expand Up @@ -76,7 +78,8 @@ services:
environment:
AGENT_SERVER_URL: ${AGENT_SERVER_URL:-http://agent-chain-server:8081}
ANALYTICS_SERVER_URL: ${ANALYTICS_SERVER_URL:-http://analytics-server:8081}
REQUEST_TIMEOUT: 320
#REQUEST_TIMEOUT: 320 # with meta/llama-3.1-70b-instruct
REQUEST_TIMEOUT: 800 # with meta/llama-3.1-405b-instruct
restart: unless-stopped # Optional: Automatically restart the container unless it is stopped
depends_on:
- agent-chain-server
Expand Down Expand Up @@ -105,7 +108,8 @@ services:
command: --port 8081 --host 0.0.0.0 --workers 1
environment:
EXAMPLE_PATH: './src/analytics'
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
# Database name to store user conversation/summary
Expand Down Expand Up @@ -157,7 +161,8 @@ services:
# Type of vectordb used to store embedding supported type milvus, pgvector
APP_VECTORSTORE_NAME: "milvus"
# url on which llm model is hosted. If "", Nvidia hosted API is used
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-70b-instruct"}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama-3.1-405b-instruct"}
# embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface
APP_LLM_MODELENGINE: nvidia-ai-endpoints
# url on which llm model is hosted. If "", Nvidia hosted API is used
Expand Down Expand Up @@ -207,7 +212,8 @@ services:
command: --port 8081 --host 0.0.0.0 --workers 1
environment:
EXAMPLE_PATH: 'src/retrievers/structured_data'
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama-3.1-70b-instruct}
#APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama-3.1-70b-instruct}
APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama-3.1-405b-instruct}
APP_LLM_MODELENGINE: nvidia-ai-endpoints
APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5}
Expand Down

0 comments on commit f294d9e

Please sign in to comment.