Skip to content

Commit

Permalink
a convenient script for spinning up the API with Model Workers (#2790)
Browse files Browse the repository at this point in the history
  • Loading branch information
ckgresla authored Dec 9, 2023
1 parent decceed commit c842764
Showing 1 changed file with 60 additions and 0 deletions.
60 changes: 60 additions & 0 deletions build-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
# A rather convenient script for spinning up models behind screens


# Variables
PROJECT_DIR="$(pwd)"
CONDA_ENV_NAME="fastchat" #

MODEL_PATH="HuggingFaceH4/zephyr-7b-beta" #beta is better than the alpha version, base model w/o quantization
MODEL_PATH="lmsys/vicuna-7b-v1.5"

API_HOST="0.0.0.0"
API_PORT_NUMBER=8000


# init the screens
check_and_create_screen() {
local SCREENNAME="$1"
if screen -list | grep -q "$SCREENNAME"; then
echo "Screen session '$SCREENNAME' exists. Doing nothing."
else
echo "Screen session '$SCREENNAME' not found. Creating..."
screen -d -m -S "$SCREENNAME"
echo "created!"
fi
}

# convenience function for sending commands to named screens
send_cmd() {
local SCREENNAME="$1"
local CMD="$2"
screen -DRRS $SCREENNAME -X stuff '$2 \r'
}

# hardcoded names, for baby api
SCREENNAMES=(
"controller"
"api"
# Worker screens include the devices they are bound to, if 'd0' is only worker it has full GPU access
"worker-d0"
"worker-d1"
)

for screen in "${SCREENNAMES[@]}"; do
check_and_create_screen "$screen"
sleep 0.1
# also activate the conda compute environment for these
screen -DRRS "$screen" -X stuff "conda deactivate \r"
screen -DRRS "$screen" -X stuff "conda activate $CONDA_ENV_NAME \r"

done


# Send Commmands on a per Screen Basis
screen -DRRS controller -X stuff "python3 -m fastchat.serve.controller \r"

screen -DRRS worker-d0 -X stuff "CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --conv-template one_shot --limit-worker-concurrency 1 \r"
screen -DRRS worker-d1 -X stuff "CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --port 21003 --worker-address http://localhost:21003 --conv-template one_shot --limit-worker-concurrency 1 \r"

screen -DRRS api -X stuff "python3 -m fastchat.serve.openai_api_server --host $API_HOST --port $API_PORT_NUMBER \r"

0 comments on commit c842764

Please sign in to comment.