Skip to content

Commit

Permalink
style
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec committed Aug 29, 2024
1 parent d289982 commit d94985a
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions trl/trainer/ppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class PPOConfig(TrainingArguments):
Initial KL penalty coefficient (used for adaptive and linear control).
kl_penalty (`Literal["kl", "abs", "mse", "full"]`, *optional*, defaults to `"kl"`):
kl penalty options. Possible values are:
- `"kl"`: model_logp - ref_logp
- `"abs"`: abs(kl)
- `"mse"`: mean squared error mse(kl)
Expand Down Expand Up @@ -131,6 +131,7 @@ class PPOConfig(TrainingArguments):
dataset_num_proc (`Optional[int]`, *optional*, defaults to `None`):
Number of processes to use for the dataset.
"""

exp_name: str = os.path.basename(sys.argv[0])[: -len(".py")]
log_with: Optional[Literal["wandb", "tensorboard"]] = None
task_name: Optional[str] = None
Expand Down Expand Up @@ -177,8 +178,6 @@ class PPOConfig(TrainingArguments):
global_batch_size: tyro.conf.Suppress[int] = None
dataset_num_proc: Optional[int] = None



def __post_init__(self):
super().__post_init__()

Expand Down

0 comments on commit d94985a

Please sign in to comment.