diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index a0ed5d5b10c1..616073273927 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -127,7 +127,7 @@ class TrainingArguments: The epsilon hyperparameter for the [`AdamW`] optimizer. max_grad_norm (`float`, *optional*, defaults to 1.0): Maximum gradient norm (for gradient clipping). - num_train_epochs(`float`, *optional*, defaults to 3.0): + num_train_epochs(`float`, *optional*, defaults to 1.0): Total number of training epochs to perform (if not an integer, will perform the decimal part percents of the last epoch before stopping training). max_steps (`int`, *optional*, defaults to -1): @@ -391,7 +391,7 @@ class TrainingArguments: adam_epsilon: float = field(default=1e-8, metadata={"help": "Epsilon for AdamW optimizer."}) max_grad_norm: float = field(default=1.0, metadata={"help": "Max gradient norm."}) - num_train_epochs: float = field(default=3.0, metadata={"help": "Total number of training epochs to perform."}) + num_train_epochs: float = field(default=1.0, metadata={"help": "Total number of training epochs to perform."}) max_steps: int = field( default=-1, metadata={"help": "If > 0: set total number of training steps to perform. Override num_train_epochs."},