Masking error. With t*valid_mask, we get the error np.inf*0 = np.inf (#…

…9407)
ray-project · Jul 12, 2020 · 3536d8e · 3536d8e
1 parent 381c242
commit 3536d8e
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/rllib/agents/ppo/ppo_torch_policy.py b/rllib/agents/ppo/ppo_torch_policy.py
@@ -70,7 +70,7 @@ def __init__(self,
             num_valid = torch.sum(valid_mask)
 
             def reduce_mean_valid(t):
-                return torch.sum(t * valid_mask) / num_valid
+                return torch.sum(t[valid_mask]) / num_valid
 
         else:
 
@@ -195,7 +195,8 @@ def value(ob, prev_action, prev_reward, *state):
                         np.asarray([prev_reward])),
                     "is_training": False,
                 }, [convert_to_torch_tensor(np.asarray([s])) for s in state],
-                    convert_to_torch_tensor(np.asarray([1])))
+                                          convert_to_torch_tensor(
+                                              np.asarray([1])))
                 return self.model.value_function()[0]
 
         else: