Remove exllama in favor of AutoGPTQ w/ exllamav2 support

h2oai · Mar 20, 2024 · 26970ef · 26970ef
1 parent 78391ae
commit 26970ef
Showing 1 changed file with 8 additions and 6 deletions.
diff --git a/docs/linux_install.sh b/docs/linux_install.sh
@@ -146,7 +146,6 @@ sudo chmod +x /usr/bin/chromedriver
 #
 #* GPU Optional: For AutoGPTQ support on x86_64 linux
 #
-pip uninstall -y auto-gptq ; pip install auto-gptq==0.6.0 -c reqs_optional/reqs_constraints.txt
 # in-transformers support of AutoGPTQ, requires also auto-gptq above to be installed since used internally by transformers/optimum
 pip install optimum==1.16.1 -c reqs_optional/reqs_constraints.txt
 #    See [AutoGPTQ](README_GPU.md#autogptq) about running AutoGPT models.
@@ -172,20 +171,23 @@ pip uninstall llama_cpp_python_cuda -y
 # Check if the environment variable `MY_ENV_VAR` contains the substring "hello"
 if [[ "${PIP_EXTRA_INDEX_URL}" == *"cu118"* ]]; then
   #* GPU Optional: For exllama support on x86_64 linux
-  pip uninstall -y exllama ; pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu118-cp310-cp310-linux_x86_64.whl --no-cache-dir -c reqs_optional/reqs_constraints.txt
+  #pip uninstall -y exllama ; pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu118-cp310-cp310-linux_x86_64.whl --no-cache-dir -c reqs_optional/reqs_constraints.txt
   #    See [exllama](README_GPU.md#exllama) about running exllama models.
-  echo "cuda118 for exllama"
+  echo "cuda118"
   # https://github.com/casper-hansen/AutoAWQ_kernels
   pip install https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.3/autoawq_kernels-0.0.3+cu118-cp310-cp310-linux_x86_64.whl
+
+  pip install auto-gptq==0.7.1 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
   echo "cuda118 for awq, see: https://github.com/casper-hansen/AutoAWQ_kernels/releases/"
 
 else
   #* GPU Optional: For exllama support on x86_64 linux
-  pip uninstall -y exllama ; pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl --no-cache-dir -c reqs_optional/reqs_constraints.txt
+  #pip uninstall -y exllama ; pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl --no-cache-dir -c reqs_optional/reqs_constraints.txt
   #    See [exllama](README_GPU.md#exllama) about running exllama models.
-  echo "cuda121 for exllama"
+  echo "cuda121"
   pip install autoawq-kernels -c reqs_optional/reqs_constraints.txt
-  echo "cuda121 for awq"
+
+  pip install auto-gptq==0.7.1 exllamav2==0.0.16
 fi