fixed multiprocess gail, and cleaned up model_rewards

GeorgeGu · Nov 11, 2023 · 37fcdda · 37fcdda
1 parent 5fe7cfc
commit 37fcdda
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 4 deletions.
diff --git a/src/tril/algorithms/gail.py b/src/tril/algorithms/gail.py
@@ -116,7 +116,7 @@ def update_buffer(self):
         terminal_rewards = torch.cat(all_scores, dim=0)
         seq_lens = self.buffer.masks.sum(axis=-1)
         self.buffer.rewards = torch.zeros(
-            (self.buffer.total_num_traj, self.max_gen_len), dtype=torch.float32
+            (self.trajectories_per_update, self.max_gen_len), dtype=torch.float32
         )
         for reward, length in zip(terminal_rewards, seq_lens):
             self.buffer.rewards[:, int(length - 1)] = reward
@@ -179,7 +179,6 @@ def discriminator_step(self):
             ) as pbar:
                 for batch_ix, rollout_data in enumerate(self.buffer_dataloader):
                     with self.accelerator.accumulate():
-                        # expert_data = next(self.expert_sampler)
                         # NOTE: we could just grab it from rollout_data.target_ids
 
                         chosen_tokens = rollout_data.observations.to(

diff --git a/src/tril/rewards/model_rewards.py b/src/tril/rewards/model_rewards.py
@@ -27,8 +27,7 @@ def __init__(
         self._metric_tokenizer.truncation_side = "left"
         self._metric_model = AutoModelForSequenceClassification.from_pretrained(
             model_name
-        )  # .to(self._accelerator.device)
-        # self._accelerator.prepare(self._metric_model)
+        )
         self._label_ix = label_ix
         self._include_prompt_for_eval = include_prompt_for_eval