revert non-core change (1/n)

huggingface · qgallouedec · Sep 4, 2024 · Aug 18, 2024 · Aug 18, 2024 · Aug 19, 2024
commit 79234d1fe145124bdd1e9ff6dad9575f7cb1185d
diff --git a/docs/source/customization.mdx b/docs/source/customization.mdx
@@ -60,7 +60,7 @@ ref_model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2')
 tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 
 # 2. define config
-ppo_config = {'output_dir': 'output_dir', 'batch_size': 1, 'learning_rate':1e-5}
+ppo_config = {'batch_size': 1, 'learning_rate':1e-5}
 config = PPOConfig(**ppo_config)
 
 
@@ -87,7 +87,7 @@ ref_model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2')
 tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 
 # 2. define config
-ppo_config = {'output_dir': 'output_dir', 'batch_size': 1, 'learning_rate':1e-5}
+ppo_config = {'batch_size': 1, 'learning_rate':1e-5}
 config = PPOConfig(**ppo_config)
 
 
@@ -128,7 +128,7 @@ ref_model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2')
 tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 
 # 2. define config
-ppo_config = {'output_dir': 'output_dir', 'batch_size': 1, 'learning_rate':1e-5}
+ppo_config = {'batch_size': 1, 'learning_rate':1e-5}
 config = PPOConfig(**ppo_config)
 
 
@@ -154,7 +154,7 @@ ref_model = create_reference_model(model, num_shared_layers=6)
 tokenizer = AutoTokenizer.from_pretrained('bigscience/bloom-560m')
 
 # 2. initialize trainer
-ppo_config = {'output_dir': 'output_dir', 'batch_size': 1}
+ppo_config = {'batch_size': 1}
 config = PPOConfig(**ppo_config)
 ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer)
 ```
@@ -182,7 +182,7 @@ ref_model = AutoModelForCausalLMWithValueHead.from_pretrained('bigscience/bloom-
 tokenizer = AutoTokenizer.from_pretrained('bigscience/bloom-560m')
 
 # 2. initialize trainer
-ppo_config = {'output_dir': 'output_dir', 'batch_size': 1}
+ppo_config = {'batch_size': 1}
 config = PPOConfig(**ppo_config)
 ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer)
 ```
@@ -203,15 +203,14 @@ As suggested by [Secrets of RLHF in Large Language Models Part I: PPO](https://h
 from trl import PPOConfig
 
 ppo_config = {
-    'output_dir': 'output_dir',
-    'use_score_scaling': True,
-    'use_score_norm': True,
-    'score_clip': 0.5,
+    use_score_scaling=True,
+    use_score_norm=True,
+    score_clip=0.5,
 }
 config = PPOConfig(**ppo_config)
 ```
 
 To run `ppo.py`, you can use the following command:
 ```
-python examples/scripts/ppo.py --output_dir output_dir --log_with wandb --use_score_scaling --use_score_norm --score_clip 0.5
+python examples/scripts/ppo.py --log_with wandb --use_score_scaling --use_score_norm --score_clip 0.5
 ```
diff --git a/docs/source/ddpo_trainer.mdx b/docs/source/ddpo_trainer.mdx
@@ -41,8 +41,8 @@ To obtain the documentation of `stable_diffusion_tuning.py`, please run `python
 
 The following are things to keep in mind (The code checks this for you as well) in general while configuring the trainer (beyond the use case of using the example script)
 
-- The configurable sample batch size (`--ddpo_config.sample_batch_size=6`) should be greater than or equal to the configurable training batch size (`--ddpo_config.per_device_train_batch_size=3`)
-- The configurable sample batch size (`--ddpo_config.sample_batch_size=6`) must be divisible by the configurable train batch size (`--ddpo_config.per_device_train_batch_size=3`)
+- The configurable sample batch size (`--ddpo_config.sample_batch_size=6`) should be greater than or equal to the configurable training batch size (`--ddpo_config.train_batch_size=3`)
+- The configurable sample batch size (`--ddpo_config.sample_batch_size=6`) must be divisible by the configurable train batch size (`--ddpo_config.train_batch_size=3`)
 - The configurable sample batch size (`--ddpo_config.sample_batch_size=6`) must be divisible by both the configurable gradient accumulation steps (`--ddpo_config.train_gradient_accumulation_steps=1`) and the configurable accelerator processes count 
 
 ## Setting up the image logging hook function

diff --git a/docs/source/logging.mdx b/docs/source/logging.mdx
@@ -6,7 +6,6 @@ By default, the TRL [`PPOTrainer`] saves a lot of relevant information to `wandb
 Upon initialization, pass one of these two options to the [`PPOConfig`]:
 ```
 config = PPOConfig(
-    output_dir="output_dir", 
     model_name=args.model_name,
     log_with=`wandb`, # or `tensorboard`
 )

diff --git a/docs/source/ppo_trainer.mdx b/docs/source/ppo_trainer.mdx
@@ -61,7 +61,6 @@ The `PPOConfig` dataclass controls all the hyperparameters and settings for the
 from trl import PPOConfig
 
 config = PPOConfig(
-    output_dir="output_dir",
     model_name="gpt2",
     learning_rate=1.41e-5,
 )

diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
@@ -30,7 +30,7 @@ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 tokenizer.pad_token = tokenizer.eos_token
 
 # 2. initialize trainer
-ppo_config = {"output_dir": "output_dir", "mini_batch_size": 1, "batch_size": 1}
+ppo_config = {"mini_batch_size": 1, "batch_size": 1}
 config = PPOConfig(**ppo_config)
 ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer)
 

diff --git a/examples/notebooks/gpt2-sentiment-control.ipynb b/examples/notebooks/gpt2-sentiment-control.ipynb
@@ -99,7 +99,7 @@
     "sentiment_pipe_kwargs = {\"top_k\": None, \"function_to_apply\": \"none\"}\n",
     "\n",
     "config = PPOConfig(\n",
-    "    output_dir =\"output_dir\", model_name=\"lvwerra/gpt2-imdb\", steps=51200, learning_rate=1.41e-5, remove_unused_columns=False, log_with=\"wandb\"\n",
+    "    model_name=\"lvwerra/gpt2-imdb\", steps=51200, learning_rate=1.41e-5, remove_unused_columns=False, log_with=\"wandb\"\n",
     ")\n",
     "\n",
     "txt_in_len = 5\n",

diff --git a/examples/notebooks/gpt2-sentiment.ipynb b/examples/notebooks/gpt2-sentiment.ipynb
@@ -87,7 +87,6 @@
    "outputs": [],
    "source": [
     "config = PPOConfig(\n",
-    "    output_dir=\"output_dir\", \n",
     "    model_name=\"lvwerra/gpt2-imdb\",\n",
     "    learning_rate=1.41e-5,\n",
     "    log_with=\"wandb\",\n",

diff --git a/examples/scripts/alignprop.py b/examples/scripts/alignprop.py
@@ -19,7 +19,7 @@
     --num_epochs=20 \
     --train_gradient_accumulation_steps=4 \
     --sample_num_steps=50 \
-    --per_device_train_batch_size=8 \
+    --train_batch_size=8 \
     --tracker_project_name="stable_diffusion_training" \
     --log_with="wandb"
 

diff --git a/examples/scripts/ddpo.py b/examples/scripts/ddpo.py
@@ -17,7 +17,7 @@
     --train_gradient_accumulation_steps=1 \
     --sample_num_steps=50 \
     --sample_batch_size=6 \
-    --per_device_train_batch_size=3 \
+    --train_batch_size=3 \
     --sample_num_batches_per_epoch=4 \
     --per_prompt_stat_tracking=True \
     --per_prompt_stat_tracking_buffer_size=32 \

diff --git a/tests/test_alignprop_trainer.py b/tests/test_alignprop_trainer.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import gc
-import tempfile
 import unittest
 
 import torch
@@ -41,53 +40,52 @@ class AlignPropTrainerTester(unittest.TestCase):
     Test the AlignPropTrainer class.
     """
 
+    def setUp(self):
+        alignprop_config = AlignPropConfig(
+            num_epochs=2,
+            train_gradient_accumulation_steps=1,
+            train_batch_size=2,
+            truncated_backprop_rand=False,
+            mixed_precision=None,
+            save_freq=1000000,
+        )
+        pretrained_model = "hf-internal-testing/tiny-stable-diffusion-torch"
+        pretrained_revision = "main"
+        pipeline_with_lora = DefaultDDPOStableDiffusionPipeline(
+            pretrained_model, pretrained_model_revision=pretrained_revision, use_lora=True
+        )
+        pipeline_without_lora = DefaultDDPOStableDiffusionPipeline(
+            pretrained_model, pretrained_model_revision=pretrained_revision, use_lora=False
+        )
+        self.trainer_with_lora = AlignPropTrainer(
+            alignprop_config, scorer_function, prompt_function, pipeline_with_lora
+        )
+        self.trainer_without_lora = AlignPropTrainer(
+            alignprop_config, scorer_function, prompt_function, pipeline_without_lora
+        )
+
     def tearDown(self) -> None:
         gc.collect()
 
     @parameterized.expand([True, False])
     def test_generate_samples(self, use_lora):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            alignprop_config = AlignPropConfig(
-                output_dir=tmp_dir,
-                num_epochs=2,
-                train_gradient_accumulation_steps=1,
-                per_device_train_batch_size=2,
-                truncated_backprop_rand=False,
-                mixed_precision=None,
-                save_freq=1000000,
-            )
-            pretrained_model = "hf-internal-testing/tiny-stable-diffusion-torch"
-            pipeline = DefaultDDPOStableDiffusionPipeline(pretrained_model, use_lora=use_lora)
-            trainer = AlignPropTrainer(alignprop_config, scorer_function, prompt_function, pipeline)
-            output_pairs = trainer._generate_samples(2, with_grad=True)
-            assert len(output_pairs.keys()) == 3
-            assert len(output_pairs["images"]) == 2
+        trainer = self.trainer_with_lora if use_lora else self.trainer_without_lora
+        output_pairs = trainer._generate_samples(2, with_grad=True)
+        assert len(output_pairs.keys()) == 3
+        assert len(output_pairs["images"]) == 2
 
     @parameterized.expand([True, False])
     def test_calculate_loss(self, use_lora):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            alignprop_config = AlignPropConfig(
-                output_dir=tmp_dir,
-                num_epochs=2,
-                train_gradient_accumulation_steps=1,
-                per_device_train_batch_size=2,
-                truncated_backprop_rand=False,
-                mixed_precision=None,
-                save_freq=1000000,
-            )
-            pretrained_model = "hf-internal-testing/tiny-stable-diffusion-torch"
-            pipeline = DefaultDDPOStableDiffusionPipeline(pretrained_model, use_lora=use_lora)
-            trainer = AlignPropTrainer(alignprop_config, scorer_function, prompt_function, pipeline)
-
-            sample = trainer._generate_samples(2)
-
-            images = sample["images"]
-            prompts = sample["prompts"]
-
-            assert images.shape == (2, 3, 128, 128)
-            assert len(prompts) == 2
-
-            rewards = trainer.compute_rewards(sample)
-            loss = trainer.calculate_loss(rewards)
-
-            assert torch.isfinite(loss.cpu())
+        trainer = self.trainer_with_lora if use_lora else self.trainer_without_lora
+        sample = trainer._generate_samples(2)
+
+        images = sample["images"]
+        prompts = sample["prompts"]
+
+        assert images.shape == (2, 3, 128, 128)
+        assert len(prompts) == 2
+
+        rewards = trainer.compute_rewards(sample)
+        loss = trainer.calculate_loss(rewards)
+
+        assert torch.isfinite(loss.cpu())