-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathDockerfile
78 lines (67 loc) · 2.75 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#FROM nvcr.io/nvidia/pytorch:23.07-py3
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
LABEL authors="remichu"
WORKDIR /app
# Enable CUDA for llama cpp
ENV CMAKE_ARGS="-DLLAMA_CUDA=on"
# Set timezone to Asia/Singapore (you can change this to any Asian timezone)
ENV TZ=Asia/Singapore
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# setting build related env vars for llama cpp python
ENV CUDA_DOCKER_ARCH=all
ENV GGML_CUDA=1
# Update and install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
cmake \
gcc \
curl \
ca-certificates \
sudo \
ninja-build \
git \
wget \
software-properties-common \
tzdata \
ocl-icd-opencl-dev opencl-headers clinfo \
libclblast-dev libopenblas-dev \
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \
&& rm -rf /var/lib/apt/lists/*
# Add deadsnakes PPA for Python 3.12 and install Python 3.12
RUN add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-distutils \
&& rm -rf /var/lib/apt/lists/*
# Install pip for Python 3.12
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
&& python3.11 get-pip.py \
&& rm get-pip.py
# Set Python 3.12 as the default python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
#ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
# Install depencencies
RUN python3 -m pip install --upgrade pip cmake scikit-build
RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
ADD requirements.txt /app
RUN pip install torch==2.3.1 --index-url https://download.pytorch.org/whl/cu121
RUN pip install ninja
RUN pip install packaging
RUN pip install flash-attn==2.6.3 --no-build-isolation
# Build from Github currently not working due to it doesnt detect CUDA device
#RUN git clone --branch dev_tp https://github.com/turboderp/exllamav2
#RUN cd exllamav2 && pip install -r requirements.txt && pip install .
RUN pip install -r requirements.txt
RUN pip install https://github.com/turboderp/exllamav2/releases/download/v0.1.8/exllamav2-0.1.8+cu121.torch2.3.1-cp311-cp311-linux_x86_64.whl
#RUN pip install git+https://github.com/huggingface/transformers
RUN pip install git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830
#git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ
#INSTALL_KERNELS=1 pip install git+https://github.com/casper-hansen/AutoAWQ.git
#pip install -vvv --no-build-isolation -e .
RUN pip install gallama
# Clean up
RUN apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENTRYPOINT ["gallama", "run"]