From e6810195d01c91615f0359242ae488d76fa6e621 Mon Sep 17 00:00:00 2001 From: tianhaodongbd <137985359+tianhaodongbd@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:52:25 +0800 Subject: [PATCH] Finetune support use_fast_layer_norm (#8717) --- llm/run_finetune.py | 1 + llm/utils/argument.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/llm/run_finetune.py b/llm/run_finetune.py index de31240d2ae3..9e654450c500 100644 --- a/llm/run_finetune.py +++ b/llm/run_finetune.py @@ -131,6 +131,7 @@ def main(): ) LlmMetaConfig.set_llm_config(model_config, training_args) + model_config.use_fast_layer_norm = model_args.use_fast_layer_norm # Config for model using dropout, such as GPT. if hasattr(model_config, "hidden_dropout_prob"): diff --git a/llm/utils/argument.py b/llm/utils/argument.py index 63a14e4126ef..58566be4132d 100644 --- a/llm/utils/argument.py +++ b/llm/utils/argument.py @@ -145,6 +145,10 @@ class ModelArgument: tokenizer_name_or_path: Optional[str] = field( default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} ) + use_fast_layer_norm: bool = field( + default=False, + metadata={"help": "GPT3 model, use fast layernorm"}, + ) fuse_attention_qkv: bool = field( default=None, metadata={"help": "whether to fuse attention qkv"},