update to tf 1.8.0

MorvanZhou · MorvanZhou · commit 682e89bd7f00 · 2018-06-13T16:16:51.000+08:00
diff --git a/contents/10_A3C/A3C_RNN.py b/contents/10_A3C/A3C_RNN.py
@@ -7,7 +7,7 @@
 
 Using:
 tensorflow 1.8.0
-gym 0.8.0
+gym 0.10.5
 """
 
 import multiprocessing
diff --git a/contents/10_A3C/A3C_continuous_action.py b/contents/10_A3C/A3C_continuous_action.py
@@ -7,7 +7,7 @@
 
 Using:
 tensorflow 1.8.0
-gym 0.8.0
+gym 0.10.5
 """
 
 import multiprocessing
diff --git a/contents/10_A3C/A3C_discrete_action.py b/contents/10_A3C/A3C_discrete_action.py
@@ -7,7 +7,7 @@
 
 Using:
 tensorflow 1.8.0
-gym 0.8.0
+gym 0.10.5
 """
 
 import multiprocessing
diff --git a/experiments/Solve_BipedalWalker/A3C.py b/experiments/Solve_BipedalWalker/A3C.py
@@ -6,8 +6,8 @@
 View more on [莫烦Python] : https://morvanzhou.github.io/tutorials/
 
 Using:
-tensorflow 1.0
-gym 0.8.0
+tensorflow 1.8.0
+gym 0.10.5
 """
 
 import multiprocessing
@@ -26,7 +26,7 @@
 MAX_GLOBAL_EP = 8000
 GLOBAL_NET_SCOPE = 'Global_Net'
 UPDATE_GLOBAL_ITER = 10
-GAMMA = 0.999
+GAMMA = 0.99
 ENTROPY_BETA = 0.005
 LR_A = 0.00002    # learning rate for actor
 LR_C = 0.0001    # learning rate for critic
@@ -76,7 +76,7 @@ def __init__(self, scope, globalAC=None):
                     self.a_loss = tf.reduce_mean(-self.exp_v)
 
                 with tf.name_scope('choose_a'):  # use local params to choose action
-                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
+                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1)), *A_BOUND)
                 with tf.name_scope('local_grad'):
                     self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
                     self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/critic')
@@ -100,7 +100,7 @@ def _build_net(self):
             sigma = tf.layers.dense(l_a, N_A, tf.nn.softplus, kernel_initializer=w_init, name='sigma')
         with tf.variable_scope('critic'):
             l_c = tf.layers.dense(self.s, 500, tf.nn.relu6, kernel_initializer=w_init, name='lc')
-            l_c = tf.layers.dense(l_c, 200, tf.nn.relu6, kernel_initializer=w_init, name='lc2')
+            l_c = tf.layers.dense(l_c, 300, tf.nn.relu6, kernel_initializer=w_init, name='lc2')
             v = tf.layers.dense(l_c, 1, kernel_initializer=w_init, name='v')  # state value
         return mu, sigma, v
 
@@ -113,7 +113,7 @@ def pull_global(self):  # run by a local
 
     def choose_action(self, s):  # run by a local
         s = s[np.newaxis, :]
-        return SESS.run(self.A, {self.s: s})[0]
+        return SESS.run(self.A, {self.s: s})
 
 
 class Worker(object):
diff --git a/experiments/Solve_BipedalWalker/A3C_rnn.py b/experiments/Solve_BipedalWalker/A3C_rnn.py
@@ -6,8 +6,8 @@
 View more on [莫烦Python] : https://morvanzhou.github.io/tutorials/
 
 Using:
-tensorflow 1.0
-gym 0.8.0
+tensorflow 1.8.0
+gym 0.10.5
 """
 
 import multiprocessing
@@ -47,7 +47,7 @@ def __init__(self, scope, globalAC=None):
         if scope == GLOBAL_NET_SCOPE:   # get global network
             with tf.variable_scope(scope):
                 self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
-                self._build_net(N_A)
+                self._build_net()
                 self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
                 self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/critic')
         else:   # local net, calculate losses
@@ -56,7 +56,7 @@ def __init__(self, scope, globalAC=None):
                 self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
                 self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
 
-                mu, sigma, self.v = self._build_net(N_A)
+                mu, sigma, self.v = self._build_net()
 
                 td = tf.subtract(self.v_target, self.v, name='TD_error')
                 with tf.name_scope('c_loss'):
@@ -76,7 +76,7 @@ def __init__(self, scope, globalAC=None):
                     self.a_loss = tf.reduce_mean(-self.exp_v)
 
                 with tf.name_scope('choose_a'):  # use local params to choose action
-                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1])
+                    self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1)), A_BOUND[0], A_BOUND[1])
 
                 with tf.name_scope('local_grad'):
                     self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
@@ -94,7 +94,7 @@ def __init__(self, scope, globalAC=None):
                     self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
                     self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
 
-    def _build_net(self, n_a):
+    def _build_net(self):
         w_init = tf.random_normal_initializer(0., .01)
         with tf.variable_scope('critic'):  # only critic controls the rnn update
             cell_size = 128
@@ -125,7 +125,7 @@ def pull_global(self):  # run by a local
     def choose_action(self, s, cell_state):  # run by a local
         s = s[np.newaxis, :]
         a, cell_state = SESS.run([self.A, self.final_state], {self.s: s, self.init_state: cell_state})
-        return a[0], cell_state
+        return a, cell_state
 
 
 class Worker(object):