Skip to content

Commit

Permalink
🚀 Dockerize llamacpp (ggerganov#132)
Browse files Browse the repository at this point in the history
* feat: dockerize llamacpp

* feat: split build & runtime stages

* split dockerfile into main & tools

* add quantize into tool docker image

* Update .devops/tools.sh

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* add docker action pipeline

* change CI to publish at github docker registry

* fix name runs-on macOS-latest is macos-latest (lowercase)

* include docker versioned images

* fix github action docker

* fix docker.yml

* feat: include all-in-one command tool & update readme.md

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
  • Loading branch information
bernatvadell and ggerganov authored Mar 17, 2023
1 parent 904d2a8 commit 2af23d3
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 2 deletions.
17 changes: 17 additions & 0 deletions .devops/full.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build

RUN apt-get update && \
apt-get install -y build-essential python3 python3-pip

RUN pip install --upgrade pip setuptools wheel \
&& pip install torch torchvision torchaudio sentencepiece numpy

WORKDIR /app

COPY . .

RUN make

ENTRYPOINT ["/app/.devops/tools.sh"]
18 changes: 18 additions & 0 deletions .devops/main.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build

RUN apt-get update && \
apt-get install -y build-essential

WORKDIR /app

COPY . .

RUN make

FROM ubuntu:$UBUNTU_VERSION as runtime

COPY --from=build /app/main /main

ENTRYPOINT [ "/main" ]
46 changes: 46 additions & 0 deletions .devops/tools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
set -e

# Read the first argument into a variable
arg1="$1"

# Shift the arguments to remove the first one
shift

# Join the remaining arguments into a single string
arg2="$@"

if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
python3 ./convert-pth-to-ggml.py $arg2
elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
./quantize $arg2
elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
./main $arg2
elif [[ $arg1 == '--download' || $arg1 == '-d' ]]; then
python3 ./download-pth.py $arg2
elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
echo "Downloading model..."
python3 ./download-pth.py "$1" "$2"
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
if [ -f "${i/f16/q4_0}" ]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
./quantize "$i" "${i/f16/q4_0}" 2
fi
done
else
echo "Unknown command: $arg1"
echo "Available commands: "
echo " --run (-r): Run a model previously converted into ggml"
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512"
echo " --convert (-c): Convert a llama model into ggml"
echo " ex: \"/models/7B/\" 1"
echo " --quantize (-q): Optimize with quantization process ggml"
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
echo " --download (-d): Download original llama model from CDN: https://agi.gpt4.org/llama/"
echo " ex: \"/models/\" 7B"
echo " --all-in-one (-a): Execute --download, --convert & --quantize"
echo " ex: \"/models/\" 7B"
fi
24 changes: 24 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
*.o
*.a
.cache/
.vs/
.vscode/
.DS_Store

build/
build-em/
build-debug/
build-release/
build-static/
build-no-accel/
build-sanitize-addr/
build-sanitize-thread/

models/*

/main
/quantize

arm_neon.h
compile_commands.json
Dockerfile
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
make
macOS-latest:
runs-on: macOS-latest
runs-on: macos-latest

steps:
- name: Clone
Expand Down
61 changes: 61 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

# GitHub recommends pinning actions to a commit SHA.
# To get a newer version, you will need to update the SHA.
# You can also reference a tag or branch, but the action may change without warning.

name: Publish Docker image

on:
pull_request:
push:
branches:
- master

jobs:
push_to_registry:
name: Push Docker image to Docker Hub
runs-on: ubuntu-latest
env:
COMMIT_SHA: ${{ github.sha }}
strategy:
matrix:
config:
- { tag: "light", dockerfile: ".devops/main.Dockerfile" }
- { tag: "full", dockerfile: ".devops/full.Dockerfile" }
steps:
- name: Check out the repo
uses: actions/checkout@v3

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push Docker image (versioned)
if: github.event_name == 'push'
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }}

- name: Build and push Docker image (tagged)
uses: docker/build-push-action@v4
with:
context: .
push: ${{ github.event_name == 'push' }}
tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
file: ${{ matrix.config.dockerfile }}
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Supported platforms:
- [X] Mac OS
- [X] Linux
- [X] Windows (via CMake)
- [X] Docker

---

Expand Down Expand Up @@ -194,6 +195,37 @@ Finally, copy the `llama` binary and the model files to your device storage. Her

https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4

### Docker

#### Prerequisites
* Docker must be installed and running on your system.
* Create a folder to store big models & intermediate files (in ex. im using /llama/models)

#### Images
We have two Docker images available for this project:

1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file.

#### Usage

The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image.

```bash
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
```

On complete, you are ready to play!

```bash
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
```

or with light image:

```bash
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
```

## Limitations

Expand Down
6 changes: 5 additions & 1 deletion convert-pth-to-ggml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# At the start of the ggml file we write the model parameters
# and vocabulary.
#

import os
import sys
import json
import struct
Expand Down Expand Up @@ -64,6 +64,10 @@ def get_n_parts(dim):
sys.exit(1)
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"

if os.path.exists(fname_out):
print(f"Skip conversion, it already exists: {fname_out}")
sys.exit(0)

with open(fname_hparams, "r") as f:
hparams = json.load(f)

Expand Down
66 changes: 66 additions & 0 deletions download-pth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import sys
from tqdm import tqdm
import requests

if len(sys.argv) < 3:
print("Usage: download-pth.py dir-model model-type\n")
print(" model-type: Available models 7B, 13B, 30B or 65B")
sys.exit(1)

modelsDir = sys.argv[1]
model = sys.argv[2]

num = {
"7B": 1,
"13B": 2,
"30B": 4,
"65B": 8,
}

if model not in num:
print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B")
sys.exit(1)

print(f"Downloading model {model}")

files = ["checklist.chk", "params.json"]

for i in range(num[model]):
files.append(f"consolidated.0{i}.pth")

resolved_path = os.path.abspath(os.path.join(modelsDir, model))
os.makedirs(resolved_path, exist_ok=True)

for file in files:
dest_path = os.path.join(resolved_path, file)

if os.path.exists(dest_path):
print(f"Skip file download, it already exists: {file}")
continue

url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}"
response = requests.get(url, stream=True)
with open(dest_path, 'wb') as f:
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
t.update(len(chunk))

files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
for file in files2:
dest_path = os.path.join(modelsDir, file)

if os.path.exists(dest_path):
print(f"Skip file download, it already exists: {file}")
continue

url = f"https://agi.gpt4.org/llama/LLaMA/{file}"
response = requests.get(url, stream=True)
with open(dest_path, 'wb') as f:
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
t.update(len(chunk))

0 comments on commit 2af23d3

Please sign in to comment.