Merge pull request theroyallab#20 from veryamazinglystupid/main

make colab better, fix libcudart errors
lonestriker · Dec 5, 2023 · 37f8f3e · 37f8f3e
2 parents 621e11b + ad1a12a
commit 37f8f3e
Showing 1 changed file with 50 additions and 45 deletions.
diff --git a/TabbyAPI_Colab_Example.ipynb b/TabbyAPI_Colab_Example.ipynb
@@ -16,6 +16,21 @@
     "accelerator": "GPU"
   },
   "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "#CELL 1\n",
+        "#@title Keep this widget playing to prevent Colab from disconnecting you { display-mode: \"form\" }\n",
+        "#@markdown Press play on the audio player that will appear below:\n",
+        "%%html\n",
+        "<audio src=\"https://oobabooga.github.io/silence.m4a\" controls>"
+      ],
+      "metadata": {
+        "id": "zU0omxLLm8AN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -24,6 +39,14 @@
       },
       "outputs": [],
       "source": [
+        "# @title # **Cell 1 - Installation w/ Model Downloading** { display-mode: \"form\" }\n",
+        "# @markdown ---\n",
+        "# @markdown # Download Model\n",
+        "# Select model and branch\n",
+        "repo_id = \"royallab/airoboros-mistral2.2-7b-exl2\" # @param {type:\"string\"}\n",
+        "revision = \"6bpw\" # @param {type:\"string\"}\n",
+        "# @markdown ---\n",
+        "\n",
         "# Install tabbyAPI\n",
         "%cd /content/\n",
         "\n",
@@ -32,52 +55,44 @@
         "!pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu118-cp310-cp310-linux_x86_64.whl -q\n",
         "!pip install -r requirements.txt -q\n",
         "!pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.6/flash_attn-2.3.6+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl -q\n",
-        "!pip install huggingface-hub -q"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
+        "!pip install huggingface-hub -q\n",
+        "\n",
         "# Download cloudflared tunnel\n",
         "%cd /content/tabbyAPI/\n",
         "\n",
         "!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared\n",
-        "!chmod a+x cloudflared"
-      ],
-      "metadata": {
-        "id": "tpcfGFPzf1C0"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
+        "!chmod a+x cloudflared\n",
+        "\n",
         "# Download model repo\n",
         "%cd /content/tabbyAPI/\n",
         "\n",
-        "# Select model and branch\n",
-        "repo_id = \"royallab/airoboros-mistral2.2-7b-exl2\"\n",
-        "revision = \"6bpw\"\n",
-        "\n",
         "from huggingface_hub import snapshot_download\n",
         "snapshot_download(repo_id=repo_id, revision=revision, local_dir=f\"./models/{repo_id.replace('/', '_')}\")\n",
         "\n",
+        "!wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb\n",
+        "!dpkg -i cuda-keyring_1.0-1_all.deb\n",
+        "!apt-get -qq update\n",
+        "!apt-get -y -qq install cuda\n",
+        "\n",
         "print(f\"Model dir: './models/{repo_id.replace('/', '_')}'\")"
-      ],
-      "metadata": {
-        "id": "ZOZwXx0cc4l1"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
       "source": [
+        "# @title # **Cell 2 - Edit Config and Start Tabby** { display-mode: \"form\" }\n",
+        "# @markdown ---\n",
+        "# @markdown # Edit Config\n",
+        "\n",
+        "model = repo_id.replace('/', '_')\n",
+        "ContextSize = 4096 # @param {type:\"raw\"}\n",
+        "RopeScale = 1.0 # @param {type:\"raw\"}\n",
+        "RopeAlpha = 1.0 # @param {type:\"raw\"}\n",
+        "\n",
         "# Setup Config - edit parameters to fit your needs\n",
         "%cd /content/tabbyAPI/\n",
         "\n",
-        "write = '''\n",
+        "write = f'''\n",
         "# Sample YAML file for configuration.\n",
         "# Comment out values as needed. Every value has a default within the application.\n",
         "\n",
@@ -101,7 +116,7 @@
         "\n",
         "  # An initial model to load. Make sure the model is located in the model directory!\n",
         "  # A model can be loaded later via the API.\n",
-        "  model_name: royallab_airoboros-mistral2.2-7b-exl2\n",
+        "  model_name: {model}\n",
         "\n",
         "  # Sends dummy model names when the models endpoint is queried\n",
         "  # Enable this if the program is looking for a specific OAI model\n",
@@ -110,7 +125,7 @@
         "  # The below parameters apply only if model_name is set\n",
         "\n",
         "  # Maximum model context length (default: 4096)\n",
-        "  max_seq_len: 4096\n",
+        "  max_seq_len: {ContextSize}\n",
         "\n",
         "  # Automatically allocate resources to GPUs (default: True)\n",
         "  gpu_split_auto: True\n",
@@ -119,8 +134,8 @@
         "  # gpu_split: [20.6, 24]\n",
         "\n",
         "  # Rope scaling parameters (default: 1.0)\n",
-        "  rope_scale: 1.0\n",
-        "  rope_alpha: 1.0\n",
+        "  rope_scale: {RopeScale}\n",
+        "  rope_alpha: {RopeAlpha}\n",
         "\n",
         "  # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False)\n",
         "  no_flash_attention: False\n",
@@ -142,24 +157,14 @@
         "'''\n",
         "with open(\"./config.yml\", \"w\") as file:\n",
         "    file.write(write)\n",
-        "!cat config.yml"
-      ],
-      "metadata": {
-        "id": "aD7Rz8ZCeShD"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# Start API\n",
+        "!cat config.yml\n",
+        "\n",
         "%cd /content/tabbyAPI/\n",
         "\n",
         "!python main.py & ./cloudflared tunnel --url localhost:5000"
       ],
       "metadata": {
-        "id": "ZhSK71UeeMJi"
+        "id": "aD7Rz8ZCeShD"
       },
       "execution_count": null,
       "outputs": []