From 4ee9b77deb6527efeaf0d4cb34749114632779bd Mon Sep 17 00:00:00 2001 From: shibing624 Date: Wed, 24 Apr 2024 20:54:12 +0800 Subject: [PATCH] update llama3 template. --- README.md | 38 ++++++++++++++++++++------------------ README_EN.md | 13 +++++++------ supervised_finetuning.py | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 888efb2..be9e47e 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Supervised Finetuning, RLHF(Reward Modeling and Reinforcement Learning) and DPO( - DPO方法来自论文[Direct Preference Optimization:Your Language Model is Secretly a Reward Model](https://arxiv.org/pdf/2305.18290.pdf) - ORPO方法来自论文[ORPO: Monolithic Preference Optimization without Reference Model](https://arxiv.org/abs/2403.07691) ## 🔥 News +[2024/04/24] v2.0版本:支持了 **Meta Llama 3** 系列模型。 [2024/04/17] v1.9版本:支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**,详细用法请参照 `run_orpo.sh`。详见[Release-v1.9](https://github.com/shibing624/MedicalGPT/releases/tag/1.9.0) @@ -146,25 +147,26 @@ Training Stage: #### Supported Models -| Model Name | Model Size | Template | -|----------------------------------------------------------------------|-----------------------------|-----------| -| [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | vicuna | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | alpaca | -| [LLaMA2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | 7B/8x7B | mistral | -| [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | baichuan | +| Model Name | Model Size | Template | +|----------------------------------------------------------------------|-----------------------------|----------| +| [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | vicuna | +| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | alpaca | +| [LLaMA2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | 7B/8x7B | mistral | +| [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | baichuan | | [Baichuan2](https://github.com/baichuan-inc/Baichuan2) | 7B/13B | baichuan2 | -| [InternLM](https://github.com/InternLM/InternLM) | 7B | intern | -| [Qwen](https://github.com/QwenLM/Qwen) | 1.8B/7B/14B/72B | chatml | -| [Qwen1.5](https://github.com/QwenLM/Qwen1.5) | 0.5B/1.8B/4B/14B/72B | qwen | -| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | xverse | -| [ChatGLM](https://github.com/THUDM/ChatGLM-6B) | 6B | chatglm | -| [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) | 6B | chatglm2 | -| [ChatGLM3](https://github.com/THUDM/ChatGLM3) | 6B | chatglm3 | -| [Yi](https://github.com/01-ai/Yi) | 6B/34B | yi | -| [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM) | 7B/16B/67B | deepseek | -| [Orion](https://github.com/OrionStarAI/Orion) | 14B | orion | -| [Cohere](https://huggingface.co/CohereForAI/c4ai-command-r-plus) | 104B | cohere | +| [InternLM](https://github.com/InternLM/InternLM) | 7B | intern | +| [Qwen](https://github.com/QwenLM/Qwen) | 1.8B/7B/14B/72B | chatml | +| [Qwen1.5](https://github.com/QwenLM/Qwen1.5) | 0.5B/1.8B/4B/14B/72B | qwen | +| [XVERSE](https://github.com/xverse-ai/XVERSE-13B) | 13B | xverse | +| [ChatGLM](https://github.com/THUDM/ChatGLM-6B) | 6B | chatglm | +| [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) | 6B | chatglm2 | +| [ChatGLM3](https://github.com/THUDM/ChatGLM3) | 6B | chatglm3 | +| [Yi](https://github.com/01-ai/Yi) | 6B/34B | yi | +| [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM) | 7B/16B/67B | deepseek | +| [Orion](https://github.com/OrionStarAI/Orion) | 14B | orion | +| [Cohere](https://huggingface.co/CohereForAI/c4ai-command-r-plus) | 104B | cohere | ## 💻 Inference 训练完成后,现在我们加载训练好的模型,验证模型生成文本的效果。 diff --git a/README_EN.md b/README_EN.md index c24e728..75c7869 100644 --- a/README_EN.md +++ b/README_EN.md @@ -124,6 +124,7 @@ sh run_ppo.sh | [BLOOMZ](https://huggingface.co/bigscience/bloomz) | 560M/1.1B/1.7B/3B/7.1B/176B | vicuna | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | alpaca | | [LLaMA2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | | [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | 7B/8x7B | mistral | | [Baichuan](https://github.com/baichuan-inc/baichuan-13B) | 7B/13B | baichuan | | [Baichuan2](https://github.com/baichuan-inc/Baichuan2) | 7B/13B | baichuan2 | @@ -151,12 +152,12 @@ pip install -r requirements.txt --upgrade ### Hardware Requirement (VRAM) -| Method | Bits | 7B | 13B | 30B | 65B | 8x7B | -| ------ | ---- | ----- | ----- | ----- | ------ | ------ | -| Full | 16 | 160GB | 320GB | 600GB | 1200GB | 900GB | -| LoRA | 16 | 16GB | 32GB | 80GB | 160GB | 120GB | -| QLoRA | 8 | 10GB | 16GB | 40GB | 80GB | 80GB | -| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 32GB | +| Train Method | Bits | 7B | 13B | 30B | 65B | 8x7B | +|--------------| ---- | ----- | ----- | ----- | ------ | ------ | +| Full | 16 | 160GB | 320GB | 600GB | 1200GB | 900GB | +| LoRA | 16 | 16GB | 32GB | 80GB | 160GB | 120GB | +| QLoRA | 8 | 10GB | 16GB | 40GB | 80GB | 80GB | +| QLoRA | 4 | 6GB | 12GB | 24GB | 48GB | 32GB | ## 🔥 Inference After the training is complete, now we load the trained model to verify the effect of the model generating text. diff --git a/supervised_finetuning.py b/supervised_finetuning.py index 3d98cf2..30c61db 100644 --- a/supervised_finetuning.py +++ b/supervised_finetuning.py @@ -552,6 +552,21 @@ def register_conv_template(template: Conversation): ) ) +"""llama3 template +source: https://github.com/ymcui/Chinese-LLaMA-Alpaca-2 +Supports: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct +""" +register_conv_template( + Conversation( + name="llama3", + system_prompt="", + messages=[], + roles=("user", "assistant"), + prompt="<|start_header_id|>user<|end_header_id|>\n\n{query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + sep="<|eot_id|>", + ) +) + """llama2-zh template source: https://github.com/ymcui/Chinese-LLaMA-Alpaca-2 Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b