Skip to content

Commit 47f11da

Browse files
Ervin Tvincentpierre
Ervin T
authored andcommitted
[bug-fix] Fix entropy computation for GaussianDistribution (#3684)
1 parent b2ed5d1 commit 47f11da

File tree

5 files changed

+12
-4
lines changed

5 files changed

+12
-4
lines changed

com.unity.ml-agents/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99
### Bug Fixes
1010
- Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650)
1111
- Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
12+
- Fixed the reported entropy values for continuous actions (#3684)
1213
- Fixed an issue where switching models using `SetModel()` during training would use an excessive amount of memory. (#3664)
1314
- Environment subprocesses now close immediately on timeout or wrong API version. (#3679)
1415
- Fixed an issue in the gym wrapper that would raise an exception if an Agent called EndEpisode multiple times in the same step. (#3700)

ml-agents/mlagents/trainers/distributions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def _create_entropy(
160160
self, encoded: "GaussianDistribution.MuSigmaTensors"
161161
) -> tf.Tensor:
162162
single_dim_entropy = 0.5 * tf.reduce_mean(
163-
tf.log(2 * np.pi * np.e) + tf.square(encoded.log_sigma)
163+
tf.log(2 * np.pi * np.e) + 2 * encoded.log_sigma
164164
)
165165
# Make entropy the right shape
166166
return tf.ones_like(tf.reshape(encoded.mu[:, 0], [-1])) * single_dim_entropy

ml-agents/mlagents/trainers/sac/optimizer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,6 @@ def __init__(self, policy: TFPolicy, trainer_params: Dict[str, Any]):
155155
"q1_loss": self.q1_loss,
156156
"q2_loss": self.q2_loss,
157157
"entropy_coef": self.ent_coef,
158-
"entropy": self.policy.entropy,
159158
"update_batch": self.update_batch_policy,
160159
"update_value": self.update_batch_value,
161160
"update_entropy": self.update_batch_entropy,

ml-agents/mlagents/trainers/tests/test_distributions.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def dummy_config():
5353

5454
def test_gaussian_distribution():
5555
with tf.Graph().as_default():
56-
logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32)
56+
logits = tf.Variable(initial_value=[[1, 1]], trainable=True, dtype=tf.float32)
5757
distribution = GaussianDistribution(
5858
logits,
5959
act_size=VECTOR_ACTION_SPACE,
@@ -71,6 +71,14 @@ def test_gaussian_distribution():
7171
assert out.shape[1] == VECTOR_ACTION_SPACE[0]
7272
output = sess.run([distribution.total_log_probs])
7373
assert output[0].shape[0] == 1
74+
# Test entropy is correct
75+
log_std_tensor = tf.get_default_graph().get_tensor_by_name(
76+
"log_std/BiasAdd:0"
77+
)
78+
feed_dict = {log_std_tensor: [[1.0, 1.0]]}
79+
entropy = sess.run([distribution.entropy], feed_dict=feed_dict)
80+
# Entropy with log_std of 1.0 should be 2.42
81+
assert pytest.approx(entropy[0], 0.01) == 2.42
7482

7583

7684
def test_tanh_distribution():

ml-agents/mlagents/trainers/tests/test_simple_rl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def test_visual_advanced_ppo(vis_encode_type, num_visual):
222222
def test_recurrent_ppo(use_discrete):
223223
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
224224
override_vals = {
225-
"max_steps": 4000,
225+
"max_steps": 5000,
226226
"batch_size": 64,
227227
"buffer_size": 128,
228228
"learning_rate": 1e-3,

0 commit comments

Comments
 (0)