Skip to content

Commit

Permalink
Apply deprecated evaluation_strategy (huggingface#1559)
Browse files Browse the repository at this point in the history
* Deprecate

* Update tests/test_dpo_trainer.py

---------

Co-authored-by: Kashif Rasul <kashif.rasul@gmail.com>
  • Loading branch information
muellerzr and kashif authored May 23, 2024
1 parent 13454d2 commit a02513c
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 57 deletions.
2 changes: 1 addition & 1 deletion benchmark/benchmark_level1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ python benchmark/benchmark.py \
--slurm-template-path benchmark/trl.slurm_template

python benchmark/benchmark.py \
--command "python examples/scripts/reward_modeling.py --model_name_or_path=facebook/opt-350m --output_dir="reward_modeling_anthropic_hh" --per_device_train_batch_size=64 --num_train_epochs=1 --gradient_accumulation_steps=16 --gradient_checkpointing=True --learning_rate=1.41e-5 --report_to="wandb" --remove_unused_columns=False --optim="adamw_torch" --logging_steps=10 --evaluation_strategy="steps" --max_length=512" \
--command "python examples/scripts/reward_modeling.py --model_name_or_path=facebook/opt-350m --output_dir="reward_modeling_anthropic_hh" --per_device_train_batch_size=64 --num_train_epochs=1 --gradient_accumulation_steps=16 --gradient_checkpointing=True --learning_rate=1.41e-5 --report_to="wandb" --remove_unused_columns=False --optim="adamw_torch" --logging_steps=10 --eval_strategy="steps" --max_length=512" \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class ScriptArguments:
per_device_eval_batch_size=script_args.per_device_eval_batch_size,
num_train_epochs=script_args.num_train_epochs,
weight_decay=script_args.weight_decay,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=500,
save_strategy="steps",
save_steps=500,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def run_training(args, train_data, val_data):
training_args = TrainingArguments(
output_dir=args.output_dir,
dataloader_drop_last=True,
evaluation_strategy="steps",
eval_strategy="steps",
max_steps=args.max_steps,
eval_steps=args.eval_freq,
save_steps=args.save_freq,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
gradient_accumulation_steps=script_args.gradient_accumulation_steps,
gradient_checkpointing=script_args.gradient_checkpointing,
learning_rate=script_args.learning_rate,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=script_args.eval_steps,
output_dir=script_args.output_dir,
report_to=script_args.report_to,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
--remove_unused_columns=False \
--optim="adamw_torch" \
--logging_steps=10 \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--eval_steps=500 \
--max_length=512 \
"""
Expand Down
6 changes: 3 additions & 3 deletions tests/slow/test_dpo_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gra
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_cpo_trainer(self, name, loss_type):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
loss_type=loss_type,
)
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_cpo_trainer_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
26 changes: 13 additions & 13 deletions tests/test_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_dpo_trainer(self, name, loss_type, pre_compute):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
loss_type=loss_type,
precompute_ref_log_probs=pre_compute,
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_dpo_trainer_without_providing_ref_model(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_dpo_trainer_without_providing_ref_model_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -246,7 +246,7 @@ def test_dpo_trainer_padding_token_is_none(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -281,7 +281,7 @@ def test_dpo_trainer_w_dataset_num_proc(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
dataset_num_proc=5,
)
Expand Down Expand Up @@ -318,7 +318,7 @@ def test_dpo_trainer_generate_during_eval_no_wandb(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
generate_during_eval=True,
)
Expand Down Expand Up @@ -364,7 +364,7 @@ def test_dpo_lora_save(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -423,7 +423,7 @@ def test_dpo_lora_bf16_autocast_llama(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
bf16=True,
beta=0.1,
generate_during_eval=True,
Expand Down Expand Up @@ -495,7 +495,7 @@ def test_dpo_lora_bf16_autocast(self, name, loss_type, pre_compute, gen_during_e
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
bf16=True,
beta=0.1,
generate_during_eval=gen_during_eval,
Expand Down Expand Up @@ -548,7 +548,7 @@ def test_dpo_lora_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -583,7 +583,7 @@ def test_dpo_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -628,7 +628,7 @@ def test_dpo_lora_force_use_ref(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand All @@ -654,7 +654,7 @@ def test_dpo_lora_force_use_ref(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
force_use_ref_model=True,
)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_kto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_kto_trainer(self, name, loss_type, pre_compute, eval_dataset):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=pre_compute,
loss_type=loss_type,
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_tokenize_and_process_tokens(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -234,7 +234,7 @@ def test_kto_trainer_without_providing_ref_model(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -331,7 +331,7 @@ def test_kto_trainer_without_providing_ref_model_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -371,7 +371,7 @@ def test_kto_trainer_generate_during_eval_no_wandb(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
generate_during_eval=True,
)
Expand Down Expand Up @@ -417,7 +417,7 @@ def test_kto_lora_save(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_orpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_orpo_trainer(self, name):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -144,7 +144,7 @@ def test_orpo_trainer_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
8 changes: 4 additions & 4 deletions tests/test_reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_reward_trainer(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -124,7 +124,7 @@ def test_reward_trainer_peft(self):
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -268,7 +268,7 @@ def test_reward_trainer_margin(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -319,7 +319,7 @@ def test_reward_trainer_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rich_progress_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_rich_progress_callback_logging(self):
per_device_eval_batch_size=2,
per_device_train_batch_size=2,
num_train_epochs=4,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=1,
logging_strategy="steps",
logging_steps=1,
Expand Down
Loading

0 comments on commit a02513c

Please sign in to comment.