From ed30fe0c4db6b85f017c78727fcbef87e338b86e Mon Sep 17 00:00:00 2001 From: shibing624 Date: Mon, 23 Oct 2023 15:40:32 +0800 Subject: [PATCH] update readme. --- README.md | 8 ++++++-- inference.py | 4 ++-- supervised_finetuning.py | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a9aac92..f8caf22 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ Supervised Finetuning, RLHF(Reward Modeling and Reinforcement Learning) and DPO( - DPO方法来自论文[Direct Preference Optimization:Your Language Model is Secretly a Reward Model](https://arxiv.org/pdf/2305.18290.pdf) ## 🔥 News +[2023/10/23] v1.6版本:新增了RoPE插值来扩展GPT模型的上下文长度;针对LLaMA模型支持了[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)和[LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**;支持了[NEFTune](https://github.com/neelsjain/NEFTune)给embedding加噪训练。详见[Release-v1.6](https://github.com/shibing624/MedicalGPT/releases/tag/1.6.0) + [2023/08/28] v1.5版本: 新增[DPO(直接偏好优化)](https://arxiv.org/pdf/2305.18290.pdf)方法,DPO通过直接优化语言模型来实现对其行为的精确控制,可以有效学习到人类偏好。详见[Release-v1.5](https://github.com/shibing624/MedicalGPT/releases/tag/1.5.0) [2023/08/08] v1.4版本: 发布基于ShareGPT4数据集微调的中英文Vicuna-13B模型[shibing624/vicuna-baichuan-13b-chat](https://huggingface.co/shibing624/vicuna-baichuan-13b-chat),和对应的LoRA模型[shibing624/vicuna-baichuan-13b-chat-lora](https://huggingface.co/shibing624/vicuna-baichuan-13b-chat-lora),详见[Release-v1.4](https://github.com/shibing624/MedicalGPT/releases/tag/1.4.0) @@ -196,8 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python inference.py \ - `--tokenizer_path {tokenizer_path}`:存放对应tokenizer的目录。若不提供此参数,则其默认值与--base_model相同 - `--template_name`:模板名称,如`vicuna`、`alpaca`等。若不提供此参数,则其默认值是vicuna - `--interactive`:以交互方式启动多轮问答,使用流式推理 -- `--data_file {file_name}`:非交互方式启动下,按行读取file_name中的的内容进行预测 -- `--predictions_file {file_name}`:非交互式方式下,将预测的结果以json格式写入file_name +- `--data_file {file_name}`:非交互方式启动下,读取file_name中的的内容进行batch预测 +- `--output_file {file_name}`:非交互式方式下,将预测的结果以jsonl格式写入file_name - `--resize_emb`:是否调整embedding大小,若不调整,则使用预训练模型的embedding大小,默认不调整 - `--only_cpu`:仅使用CPU进行推理 - `--gpus {gpu_ids}`:指定使用的GPU设备编号,默认为0。如使用多张GPU,以逗号分隔,如0,1,2 @@ -330,5 +332,7 @@ CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 inference_multigpu_demo.py - [Direct Preference Optimization:Your Language Model is Secretly a Reward Model](https://arxiv.org/pdf/2305.18290.pdf) - [tloen/alpaca-lora](https://github.com/tloen/alpaca-lora/blob/main/finetune.py) - [ymcui/Chinese-LLaMA-Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) +- [hiyouga/LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) +- [dvlab-research/LongLoRA](https://github.com/dvlab-research/LongLoRA) Thanks for their great work! diff --git a/inference.py b/inference.py index ff97379..4dd6cc1 100644 --- a/inference.py +++ b/inference.py @@ -229,7 +229,7 @@ def main(): history[-1][-1] = response.strip() else: print("Start inference.") - counts = [] + counts = 0 if os.path.exists(args.output_file): os.remove(args.output_file) eval_batch_size = args.eval_batch_size @@ -251,7 +251,7 @@ def main(): repetition_penalty=args.repetition_penalty, ) results = [] - for example, response in enumerate(batch, responses): + for example, response in zip(batch, responses): print(f"===") print(f"Input: {example}") print(f"Output: {response}\n") diff --git a/supervised_finetuning.py b/supervised_finetuning.py index cfff745..247903e 100644 --- a/supervised_finetuning.py +++ b/supervised_finetuning.py @@ -21,6 +21,7 @@ import os from dataclasses import dataclass, field from glob import glob +from types import MethodType from typing import Literal, Optional, Tuple, List, Dict, Sequence import torch @@ -133,6 +134,10 @@ class ModelArguments: default=False, metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."} ) + neft_alpha: Optional[float] = field( + default=0, + metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune. value can be 5."} + ) def __post_init__(self): if self.model_type is None: @@ -1167,6 +1172,22 @@ def filter_empty_labels(example): bnb_4bit_compute_dtype=torch_dtype, ) if training_args.qlora else None, ) + + # Set NEFTune trick for fine-tuning + if model_args.neft_alpha > 0: + input_embed = model.get_input_embeddings() + if isinstance(input_embed, torch.nn.Embedding): + def noisy_forward(self: torch.nn.Embedding, x: torch.Tensor) -> torch.Tensor: + embeddings = input_embed.__class__.forward(self, x) + dims = self.num_embeddings * self.embedding_dim + mag_norm = model_args.neft_alpha / (dims ** 0.5) + embeddings += torch.zeros_like(embeddings).uniform_(-mag_norm, mag_norm) + return embeddings + + input_embed.forward = MethodType(noisy_forward, input_embed) + logger.info("Using noisy embedding with alpha={:.2f}".format(model_args.neft_alpha)) + else: + logger.warning("Input embeddings are not normal nn.Embedding, cannot transform into noisy embedding.") else: raise ValueError(f"Error, model_name_or_path is None, SFT must be loaded from a pre-trained model")