fixed SAC discrete error that meant we werent taking the mean of the …

…actor loss
qpc001 · Jul 19, 2019 · 9cf68ff · 9cf68ff
1 parent 3cd76da
commit 9cf68ff
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/agents/actor_critic_agents/SAC_Discrete.py b/agents/actor_critic_agents/SAC_Discrete.py
@@ -81,7 +81,7 @@ def calculate_actor_loss(self, state_batch):
         qf2_pi = self.critic_local_2(state_batch)
         min_qf_pi = torch.min(qf1_pi, qf2_pi)
         inside_term = self.alpha * log_action_probabilities - min_qf_pi
-        policy_loss = torch.sum(action_probabilities * inside_term)
+        policy_loss = action_probabilities * inside_term
         policy_loss = policy_loss.mean()
         log_action_probabilities = log_action_probabilities.gather(1, action.unsqueeze(-1).long())
         return policy_loss, log_action_probabilities