add a test case for num_train_epochs

huggingface · vwxyzjn · Jun 20, 2024 · Jun 16, 2024 · Jun 17, 2024 · Jun 17, 2024
commit 0eb25ffde209a0ff55def7d6b3fb9033fda926b5
diff --git a/tests/test_ppov2_trainer.py b/tests/test_ppov2_trainer.py
@@ -16,7 +16,7 @@
 
 def test():
     command = """\
-python -i examples/scripts/ppo/ppo.py \
+python examples/scripts/ppo/ppo.py \
     --learning_rate 3e-6 \
     --output_dir models/minimal/ppo \
     --per_device_train_batch_size 5 \
@@ -31,3 +31,22 @@ def test():
         shell=True,
         check=True,
     )
+
+
+def test_num_train_epochs():
+    command = """\
+python examples/scripts/ppo/ppo.py \
+    --learning_rate 3e-6 \
+    --output_dir models/minimal/ppo \
+    --per_device_train_batch_size 5 \
+    --gradient_accumulation_steps 1 \
+    --num_train_epochs 0.003 \
+    --model_name_or_path EleutherAI/pythia-14m \
+    --non_eos_penalty \
+    --stop_token eos \
+"""
+    subprocess.run(
+        command,
+        shell=True,
+        check=True,
+    )
diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py
@@ -18,7 +18,7 @@
 
 def test():
     command = """\
-python -i examples/scripts/rloo/rloo.py \
+python examples/scripts/rloo/rloo.py \
     --learning_rate 3e-6 \
     --output_dir models/minimal/rloo \
     --per_device_train_batch_size 5 \

diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py
@@ -101,7 +101,7 @@ def __init__(
         # calculate various batch sizes
         #########
         if args.total_episodes is None:  # allow the users to define episodes in terms of epochs.
-            args.total_episodes = args.num_train_epochs * self.train_dataset_len
+            args.total_episodes = int(args.num_train_epochs * self.train_dataset_len)
         accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps)
         self.accelerator = accelerator
         args.world_size = accelerator.num_processes

diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
@@ -83,7 +83,7 @@ def __init__(
         # calculate various batch sizes
         #########
         if args.total_episodes is None:  # allow the users to define episodes in terms of epochs.
-            args.total_episodes = args.num_train_epochs * self.train_dataset_len
+            args.total_episodes = int(args.num_train_epochs * self.train_dataset_len)
         accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps)
         self.accelerator = accelerator
         args.world_size = accelerator.num_processes