Finetune support use_fast_layer_norm (#8717)

PaddlePaddle · Jul 8, 2024 · e681019 · e681019
1 parent bdd2287
commit e681019
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 0 deletions.
diff --git a/llm/run_finetune.py b/llm/run_finetune.py
@@ -131,6 +131,7 @@ def main():
     )
 
     LlmMetaConfig.set_llm_config(model_config, training_args)
+    model_config.use_fast_layer_norm = model_args.use_fast_layer_norm
 
     # Config for model using dropout, such as GPT.
     if hasattr(model_config, "hidden_dropout_prob"):

diff --git a/llm/utils/argument.py b/llm/utils/argument.py
@@ -145,6 +145,10 @@ class ModelArgument:
     tokenizer_name_or_path: Optional[str] = field(
         default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
     )
+    use_fast_layer_norm: bool = field(
+        default=False,
+        metadata={"help": "GPT3 model, use fast layernorm"},
+    )
     fuse_attention_qkv: bool = field(
         default=None,
         metadata={"help": "whether to fuse attention qkv"},