fix actor update problem

MorvanZhou · MorvanZhou · commit 7517807a2c60 · 2018-12-27T10:09:05.000+08:00
diff --git a/contents/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG.py b/contents/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG.py
@@ -119,7 +119,7 @@ def __init__(self, sess, state_dim, action_dim, learning_rate, gamma, replacemen
 
         with tf.variable_scope('Critic'):
             # Input (s, a), output q
-            self.a = a
+            self.a = tf.stop_gradient(a)    # stop critic update flows to actor
             self.q = self._build_net(S, self.a, 'eval_net', trainable=True)
 
             # Input (s_, a_), output q_ for q_target