[add-fire] Halve Gaussian entropy (#4319)

Ervin T · web-flow · commit 4759d1fb173d · 2020-08-07T10:48:29.000-07:00
* Halve entropy

* Fix utils test
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
@@ -105,8 +105,8 @@ def test_gaussian_dist_instance():
         assert log_prob == pytest.approx(-0.919, abs=0.01)
 
     for ent in dist_instance.entropy().flatten():
-        # entropy of standard normal at 0
-        assert ent == pytest.approx(2.83, abs=0.01)
+        # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
+        assert ent == pytest.approx(1.42, abs=0.01)
 
 
 def test_tanh_gaussian_dist_instance():
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -149,7 +149,7 @@ def test_get_probs_and_entropy():
 
     for ent in entropies.flatten():
         # entropy of standard normal at 0
-        assert ent == pytest.approx(2.83, abs=0.01)
+        assert ent == pytest.approx(1.42, abs=0.01)
 
     # Test continuous
     # Add two dists to the list.
diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -66,7 +66,7 @@ def pdf(self, value):
         return torch.exp(log_prob)
 
     def entropy(self):
-        return torch.log(2 * math.pi * math.e * self.std + EPSILON)
+        return 0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON)
 
 
 class TanhGaussianDistInstance(GaussianDistInstance):