From a68cbdbac05726391891c74a8037f571cecd9fd0 Mon Sep 17 00:00:00 2001 From: "Jonathan C. McKinney" Date: Sat, 27 Jan 2024 10:36:53 -0800 Subject: [PATCH] Go to 12.1 instead of 12.2 so gpu system doesn't have to upgrade driver from 530.30.02 cuda12.1 --- Dockerfile | 4 ++-- docker_build_script_ubuntu.sh | 4 ++-- docs/README_InferenceServers.md | 12 +++--------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index ec83b571b..0aeddf7dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient -FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04 ENV DEBIAN_FRONTEND=noninteractive @@ -7,7 +7,7 @@ ENV PATH="/h2ogpt_conda/bin:${PATH}" ARG PATH="/h2ogpt_conda/bin:${PATH}" ENV HOME=/workspace -ENV CUDA_HOME=/usr/local/cuda-12.2 +ENV CUDA_HOME=/usr/local/cuda-12.1 ENV VLLM_CACHE=/workspace/.vllm_cache ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache diff --git a/docker_build_script_ubuntu.sh b/docker_build_script_ubuntu.sh index 913ce3dc8..5c34743c5 100755 --- a/docker_build_script_ubuntu.sh +++ b/docker_build_script_ubuntu.sh @@ -5,8 +5,8 @@ set -ex export DEBIAN_FRONTEND=noninteractive export PATH=/h2ogpt_conda/bin:$PATH export HOME=/workspace -export CUDA_HOME=/usr/local/cuda-12.2 -export PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu122 +export CUDA_HOME=/usr/local/cuda-12.1 +export PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu121 # Install linux dependencies apt-get update && apt-get install -y \ diff --git a/docs/README_InferenceServers.md b/docs/README_InferenceServers.md index 2ca61d9ed..1dadb612b 100644 --- a/docs/README_InferenceServers.md +++ b/docs/README_InferenceServers.md @@ -256,9 +256,9 @@ conda create -n vllm -y conda activate vllm conda install python=3.10 -y ``` -Assuming torch was installed with CUDA 12.3, and you have installed cuda locally in `/usr/local/cuda-12.3`: +Assuming torch was installed with CUDA 12.1, and you have installed cuda locally in `/usr/local/cuda-12.1`: ```bash -export CUDA_HOME=/usr/local/cuda-12.3 +export CUDA_HOME=/usr/local/cuda-12.1 export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu123" pip install mosaicml-turbo --upgrade # see docker_build_script_ubuntu.sh for x86 prebuilt wheel on s3 pip install git+https://github.com/stanford-futuredata/megablocks.git # see docker_build_script_ubuntu.sh for x86 prebuilt wheel on s3 @@ -288,14 +288,8 @@ export CUDA_VISIBLE_DEVICESs=0,1,2,3 python -m vllm.entrypoints.openai.api_server --port=5000 --host=0.0.0.0 --model h2oai/h2ogpt-4096-llama2-70b-chat --tokenizer=hf-internal-testing/llama-tokenizer --tensor-parallel-size=4 --seed 1234 --max-num-batched-tokens=8192 ``` -For Mixtral 8*7B run: +For Mixtral 8*7B need newer cuda 12 toolkit and vllm build, then run: ```bash -export CUDA_HOME=/usr/local/cuda-12.3 -export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu123" -# so builds on cuda 12.3 given 12.1 is default build -pip install git+https://github.com/vllm-project/vllm.git -pip install mosaicml-turbo -pip install git+https://github.com/stanford-futuredata/megablocks.git export CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server --port=5002 --host=0.0.0.0 --model mistralai/Mixtral-8x7B-Instruct-v0.1 --seed 1234 --max-num-batched-tokens=65536 --tensor-parallel-size=2 ```