Skip to content

Commit 899acbe

Browse files
author
morvanzhou
committed
avoid 0
1 parent 215f31c commit 899acbe

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

contents/12_Proximal_Policy_Optimization/discrete_DPPO.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def __init__(self):
6161
a_indices = tf.stack([tf.range(tf.shape(self.tfa)[0], dtype=tf.int32), self.tfa], axis=1)
6262
pi_prob = tf.gather_nd(params=self.pi, indices=a_indices) # shape=(None, )
6363
oldpi_prob = tf.gather_nd(params=oldpi, indices=a_indices) # shape=(None, )
64-
ratio = pi_prob/oldpi_prob
64+
ratio = pi_prob/(oldpi_prob + 1e-5)
6565
surr = ratio * self.tfadv # surrogate loss
6666

6767
self.aloss = -tf.reduce_mean(tf.minimum( # clipped surrogate objective

contents/12_Proximal_Policy_Optimization/simply_PPO.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __init__(self):
6060
with tf.variable_scope('loss'):
6161
with tf.variable_scope('surrogate'):
6262
# ratio = tf.exp(pi.log_prob(self.tfa) - oldpi.log_prob(self.tfa))
63-
ratio = pi.prob(self.tfa) / oldpi.prob(self.tfa)
63+
ratio = pi.prob(self.tfa) / (oldpi.prob(self.tfa) + 1e-5)
6464
surr = ratio * self.tfadv
6565
if METHOD['name'] == 'kl_pen':
6666
self.tflam = tf.placeholder(tf.float32, None, 'lambda')

0 commit comments

Comments
 (0)