reward leak fix

#8
nikhilbarhate99 · Sep 20, 2019 · 6c9a2ef · 6c9a2ef
1 parent d02da8d
commit 6c9a2ef
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/PPO.py b/PPO.py
@@ -90,10 +90,10 @@ def update(self, memory):
         rewards = []
         discounted_reward = 0
         for reward, is_terminal in zip(reversed(memory.rewards), reversed(memory.is_terminals)):
-            discounted_reward = reward + (self.gamma * discounted_reward)
-            rewards.insert(0, discounted_reward)
             if is_terminal:
                 discounted_reward = 0
+            discounted_reward = reward + (self.gamma * discounted_reward)
+            rewards.insert(0, discounted_reward)
 
         # Normalizing the rewards:
         rewards = torch.tensor(rewards).to(device)