chore: cherry-pick #355 (#356)

sozercan · Aug 3, 2024 · da94e27 · da94e27
1 parent 69284c6
commit da94e27
Show file tree

Hide file tree

Showing 6 changed files with 59 additions and 57 deletions.
diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml
@@ -24,7 +24,7 @@ jobs:
               - ghcr.io/sozercan/llama3:70b
               - ghcr.io/sozercan/mixtral:8x7b
               - ghcr.io/sozercan/phi3:3.8b
-              - ghcr.io/sozercan/gemma1.1:2b
+              - ghcr.io/sozercan/gemma2:2b
               - ghcr.io/sozercan/codestral0.1:22b
         steps:
         - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1

diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
@@ -22,7 +22,7 @@ jobs:
         model:
          - llama-3.1-8b-instruct
          - phi-3-3.8b
-         - gemma-2b-instruct
+         - gemma-2-2b-instruct
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:

diff --git a/README.md b/README.md
@@ -81,33 +81,29 @@ If it doesn't include a specific model, you can always [create your own images](
 
 ## CPU
 
-| Model           | Optimization | Parameters | Command                                                             | Model Name              | License                                                                             |
-| --------------- | ------------ | ---------- | ------------------------------------------------------------------- | ----------------------- | ----------------------------------------------------------------------------------- |
-| 🦙 Llama 3       | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b`        | `llama-3-8b-instruct`   | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 3       | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b`       | `llama-3-70b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 2       | Chat         | 7B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b`        | `llama-2-7b-chat`       | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 2       | Chat         | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b`       | `llama-2-13b-chat`      | [Llama](https://ai.meta.com/llama/license/)                                         |
-| Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`     | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
-| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`        | `phi-3-3.8b`            | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms)                                          |
-| ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`     | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
+| Model           | Optimization | Parameters | Command                                                          | Model Name               | License                                                                             |
+| --------------- | ------------ | ---------- | ---------------------------------------------------------------- | ------------------------ | ----------------------------------------------------------------------------------- |
+| 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
+| Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
+| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 ### NVIDIA CUDA
 
 > [!NOTE]
 > To enable GPU acceleration, please see [GPU Acceleration](https://sozercan.github.io/aikit/docs/gpu).
 > Please note that only difference between CPU and GPU section is the `--gpus all` flag in the command to enable GPU acceleration.
 
-| Model           | Optimization | Parameters | Command                                                                        | Model Name              | License                                                                             |
-| --------------- | ------------ | ---------- | ------------------------------------------------------------------------------ | ----------------------- | ----------------------------------------------------------------------------------- |
-| 🦙 Llama 3       | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b`        | `llama-3-8b-instruct`   | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 3       | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b`       | `llama-3-70b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 2       | Chat         | 7B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b`        | `llama-2-7b-chat`       | [Llama](https://ai.meta.com/llama/license/)                                         |
-| 🦙 Llama 2       | Chat         | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b`       | `llama-2-13b-chat`      | [Llama](https://ai.meta.com/llama/license/)                                         |
-| Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`     | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
-| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`        | `phi-3-3.8b`            | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms)                                          |
-| ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`     | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
+| Model           | Optimization | Parameters | Command                                                                     | Model Name               | License                                                                             |
+| --------------- | ------------ | ---------- | --------------------------------------------------------------------------- | ------------------------ | ----------------------------------------------------------------------------------- |
+| 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                         |
+| 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
+| Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
+| 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 ## What's next?
 

diff --git a/models/gemma-2b-instruct.yaml → models/gemma-2-2b-instruct.yaml b/models/gemma-2b-instruct.yaml → models/gemma-2-2b-instruct.yaml
@@ -3,9 +3,9 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: gemma-2b-instruct
-    source: https://huggingface.co/lmstudio-community/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf
-    sha256: cc2118e1d780fa33582738d8c99223d62c8734b06ef65076c01618d484d081d4
+  - name: gemma-2-2b-instruct
+    source: https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf
+    sha256: e0aee85060f168f0f2d8473d7ea41ce2f3230c1bc1374847505ea599288a7787
     promptTemplates:
       - name: chatMsg
         template: |
@@ -21,10 +21,10 @@ models:
         template: |
           {{ .Input }}
 config: |
-  - name: gemma-2b-instruct
+  - name: gemma-2-2b-instruct
     backend: llama
     parameters:
-      model: gemma-1.1-2b-it-Q4_K_M.gguf
+      model: gemma-2-2b-it-Q4_K_M.gguf
     context_size: 8192
     template:
       chat_message: chatMsg
@@ -35,6 +35,5 @@ config: |
      - \"<start_of_turn>\"
      - \"<end_of_turn>\"
      - \"<|im_end|>\"
-    gpu_layers: 35
     f16: true
     mmap: true
diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh
@@ -17,7 +17,7 @@ extract_model_type() {
 }
 
 # Run and display results for each example
-for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do
+for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do
     echo "Model: $MODEL"
     echo "  Name: $(extract_model_name $MODEL)"
     echo "  Size: $(extract_model_size $MODEL)"