Skip to content

Commit

Permalink
Try to fix flash attn in vllm build
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Apr 8, 2024
1 parent 9b5e26e commit 3c7c77d
Showing 1 changed file with 5 additions and 10 deletions.
15 changes: 5 additions & 10 deletions docker_build_script_ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,12 @@ python -m venv vllm_env --system-site-packages
# gputil is for rayWorker in vllm to run as non-root
# below required outside docker:
# apt-get install libnccl2
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/vllm-0.2.7%2Bcu118-cp310-cp310-linux_x86_64.whl
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://github.com/vllm-project/vllm/releases/download/v0.2.7/vllm-0.2.7+cu118-cp310-cp310-manylinux1_x86_64.whl
#/h2ogpt_conda/vllm_env/bin/python -m pip install vllm

/h2ogpt_conda/vllm_env/bin/python -m pip install ray pandas gputil==1.4.0 fschat==0.2.34 flash-attn==2.5.6 uvicorn[standard] hf_transfer==0.1.6 triton==2.2.0
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/megablocks-0.5.1-cp310-cp310-linux_x86_64.whl
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/mosaicml_turbo-0.0.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
# below has issue that compiled on A100, doesn't seem to work on V100, go back to vllm's own build
#/h2ogpt_conda/vllm_env/bin/python -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/vllm-0.3.0-cp310-cp310-manylinux1_x86_64.whl
/h2ogpt_conda/vllm_env/bin/python -m pip install vllm==0.4.0.post1
/h2ogpt_conda/vllm_env/bin/python -m pip uninstall flash-attn -y
/h2ogpt_conda/vllm_env/bin/python -m pip install gputil==1.4.0 flash-attn==2.5.6 hf_transfer==0.1.6

# pip install hf_transfer
# pip install tiktoken accelerate flash_attn
mkdir $VLLM_CACHE
chmod -R a+rwx /h2ogpt_conda

Expand Down

0 comments on commit 3c7c77d

Please sign in to comment.