Skip to content

exllama(v2): fix exllamav1, add exllamav2 #1384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
ARG GO_TAGS="stablediffusion tts"

Expand Down Expand Up @@ -181,16 +181,13 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
; fi

# we also copy exllama libs over to resolve exllama import error
# TODO: check if this is still needed
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama backend/python/exllama/;\
fi

# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ protogen-python:
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto

## GRPC
# Note: it is duplicated in the Dockerfile
Expand All @@ -409,6 +410,7 @@ prepare-extra-conda-environments:
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2


backend-assets/grpc:
Expand Down
1 change: 1 addition & 0 deletions backend/python/exllama/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ exllama:
@echo "Creating virtual environment..."
@conda env create --name exllama --file exllama.yml
@echo "Virtual environment created."
bash install.sh

.PHONY: run
run:
Expand Down
7 changes: 4 additions & 3 deletions backend/python/exllama/exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
import torch
import torch.nn.functional as F
from torch import version as torch_version
from exllama.generator import ExLlamaGenerator
from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
from exllama.tokenizer import ExLlamaTokenizer

from tokenizer import ExLlamaTokenizer
from generator import ExLlamaGenerator
from model import ExLlama, ExLlamaCache, ExLlamaConfig

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

Expand Down
3 changes: 2 additions & 1 deletion backend/python/exllama/exllama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies:
- mpmath==1.3.0
- networkx==3.1
- ninja==1.11.1
- protobuf==4.24.4
- nvidia-cublas-cu12==12.1.3.1
- nvidia-cuda-cupti-cu12==12.1.105
- nvidia-cuda-nvrtc-cu12==12.1.105
Expand All @@ -45,11 +46,11 @@ dependencies:
- nvidia-nccl-cu12==2.18.1
- nvidia-nvjitlink-cu12==12.2.140
- nvidia-nvtx-cu12==12.1.105
- protobuf==4.24.4
- safetensors==0.3.2
- sentencepiece==0.1.99
- sympy==1.12
- torch==2.1.0
- triton==2.1.0
- typing-extensions==4.8.0
- numpy
prefix: /opt/conda/envs/exllama
15 changes: 15 additions & 0 deletions backend/python/exllama/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

##
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
export PATH=$PATH:/opt/conda/bin

# Activate conda environment
source activate exllama

echo $CONDA_PREFIX


git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd

cp -rfv $CONDA_PREFIX/exllama/* ./
12 changes: 12 additions & 0 deletions backend/python/exllama2/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.PHONY: exllama2
exllama2:
@echo "Creating virtual environment..."
@conda env create --name exllama2 --file exllama2.yml
@echo "Virtual environment created."
bash install.sh

.PHONY: run
run:
@echo "Running exllama2..."
bash run.sh
@echo "exllama2 run."
61 changes: 61 additions & 0 deletions backend/python/exllama2/backend_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading