-{"cells":[{"cell_type":"markdown","metadata":{},"source":["[](https://kaggle.com/kernels/welcome?src=https://github.com/Isotr0py/SakuraLLM-Notebooks/blob/main/Sakura-13B-Galgame-Kaggle-llama.cpp.ipynb)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2023-12-27T12:39:24.065177Z","iopub.status.busy":"2023-12-27T12:39:24.064891Z","iopub.status.idle":"2023-12-27T12:44:04.909267Z","shell.execute_reply":"2023-12-27T12:44:04.907937Z","shell.execute_reply.started":"2023-12-27T12:39:24.065151Z"},"trusted":true},"outputs":[],"source":["!git clone https://github.com/SakuraLLM/Sakura-13B-Galgame.git\n","%cd Sakura-13B-Galgame\n","\n","!pip install \"diskcache>=5.6.1\"\n","!pip install llama-cpp-python -i https://sakurallm.github.io/llama-cpp-python/whl/cu121\n","!pip install -q -r requirements.llamacpp.txt\n","!pip install -q pyngrok\n","\n","# install localtunnel\n","!npm install -g localtunnel"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2023-12-27T12:44:04.911814Z","iopub.status.busy":"2023-12-27T12:44:04.911507Z"},"trusted":true},"outputs":[],"source":["# ngrokToken留空则使用localtunnel进行内网穿透\n","ngrokToken = \"\"\n","use_pinggy = True\n","MODEL = \"sakura-14b-qwen2beta-v0.9-Q6_K\"\n","\n","\n","from huggingface_hub import hf_hub_download\n","from pathlib import Path\n","\n","if ngrokToken:\n"," from pyngrok import conf, ngrok\n"," conf.get_default().auth_token = ngrokToken\n"," conf.get_default().monitor_thread = False\n"," ssh_tunnels = ngrok.get_tunnels(conf.get_default())\n"," if len(ssh_tunnels) == 0:\n"," ssh_tunnel = ngrok.connect(5000)\n"," print('address:'+ssh_tunnel.public_url)\n"," else:\n"," print('address:'+ssh_tunnels[0].public_url)\n","elif use_pinggy:\n"," import subprocess\n"," import threading\n"," def start_pinggy(port):\n"," cmd = f\"ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 443 -R0:localhost:{port} a.pinggy.io\"\n"," p = subprocess.Popen(cmd.split(\" \"), stdout=subprocess.PIPE)\n"," for line in p.stdout:\n"," print(line.decode(), end='')\n"," threading.Thread(target=start_pinggy, daemon=True, args=(5000,)).start()\n","else:\n"," import subprocess\n"," import threading\n"," def start_localtunnel(port):\n"," p = subprocess.Popen([\"lt\", \"--port\", f\"{port}\"], stdout=subprocess.PIPE)\n"," for line in p.stdout:\n"," print(line.decode(), end='')\n"," threading.Thread(target=start_localtunnel, daemon=True, args=(5000,)).start()\n","\n","MODEL_PATH = f\"./models/{MODEL}.gguf\"\n","if not Path(MODEL_PATH).exists():\n"," hf_hub_download(repo_id=\"SakuraLLM/Sakura-14B-Qwen2beta-v0.9-GGUF\", filename=f\"{MODEL}.gguf\", local_dir=\"models/\")\n","\n","!python server.py \\\n"," --model_name_or_path $MODEL_PATH \\\n"," --llama_cpp \\\n"," --use_gpu \\\n"," --model_version 0.9 \\\n"," --trust_remote_code \\\n"," --no-auth"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"datasetId":4208491,"sourceId":7261583,"sourceType":"datasetVersion"}],"dockerImageVersionId":30627,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"}},"nbformat":4,"nbformat_minor":4}
0 commit comments