Skip to content

Commit 682e89b

Browse files
committed
update to tf 1.8.0
1 parent 7c2db0e commit 682e89b

File tree

5 files changed

+16
-16
lines changed

5 files changed

+16
-16
lines changed

contents/10_A3C/A3C_RNN.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
Using:
99
tensorflow 1.8.0
10-
gym 0.8.0
10+
gym 0.10.5
1111
"""
1212

1313
import multiprocessing

contents/10_A3C/A3C_continuous_action.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
Using:
99
tensorflow 1.8.0
10-
gym 0.8.0
10+
gym 0.10.5
1111
"""
1212

1313
import multiprocessing

contents/10_A3C/A3C_discrete_action.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
Using:
99
tensorflow 1.8.0
10-
gym 0.8.0
10+
gym 0.10.5
1111
"""
1212

1313
import multiprocessing

experiments/Solve_BipedalWalker/A3C.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
View more on [莫烦Python] : https://morvanzhou.github.io/tutorials/
77
88
Using:
9-
tensorflow 1.0
10-
gym 0.8.0
9+
tensorflow 1.8.0
10+
gym 0.10.5
1111
"""
1212

1313
import multiprocessing
@@ -26,7 +26,7 @@
2626
MAX_GLOBAL_EP = 8000
2727
GLOBAL_NET_SCOPE = 'Global_Net'
2828
UPDATE_GLOBAL_ITER = 10
29-
GAMMA = 0.999
29+
GAMMA = 0.99
3030
ENTROPY_BETA = 0.005
3131
LR_A = 0.00002 # learning rate for actor
3232
LR_C = 0.0001 # learning rate for critic
@@ -76,7 +76,7 @@ def __init__(self, scope, globalAC=None):
7676
self.a_loss = tf.reduce_mean(-self.exp_v)
7777

7878
with tf.name_scope('choose_a'): # use local params to choose action
79-
self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND)
79+
self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1)), *A_BOUND)
8080
with tf.name_scope('local_grad'):
8181
self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
8282
self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/critic')
@@ -100,7 +100,7 @@ def _build_net(self):
100100
sigma = tf.layers.dense(l_a, N_A, tf.nn.softplus, kernel_initializer=w_init, name='sigma')
101101
with tf.variable_scope('critic'):
102102
l_c = tf.layers.dense(self.s, 500, tf.nn.relu6, kernel_initializer=w_init, name='lc')
103-
l_c = tf.layers.dense(l_c, 200, tf.nn.relu6, kernel_initializer=w_init, name='lc2')
103+
l_c = tf.layers.dense(l_c, 300, tf.nn.relu6, kernel_initializer=w_init, name='lc2')
104104
v = tf.layers.dense(l_c, 1, kernel_initializer=w_init, name='v') # state value
105105
return mu, sigma, v
106106

@@ -113,7 +113,7 @@ def pull_global(self): # run by a local
113113

114114
def choose_action(self, s): # run by a local
115115
s = s[np.newaxis, :]
116-
return SESS.run(self.A, {self.s: s})[0]
116+
return SESS.run(self.A, {self.s: s})
117117

118118

119119
class Worker(object):

experiments/Solve_BipedalWalker/A3C_rnn.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
View more on [莫烦Python] : https://morvanzhou.github.io/tutorials/
77
88
Using:
9-
tensorflow 1.0
10-
gym 0.8.0
9+
tensorflow 1.8.0
10+
gym 0.10.5
1111
"""
1212

1313
import multiprocessing
@@ -47,7 +47,7 @@ def __init__(self, scope, globalAC=None):
4747
if scope == GLOBAL_NET_SCOPE: # get global network
4848
with tf.variable_scope(scope):
4949
self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
50-
self._build_net(N_A)
50+
self._build_net()
5151
self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
5252
self.c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/critic')
5353
else: # local net, calculate losses
@@ -56,7 +56,7 @@ def __init__(self, scope, globalAC=None):
5656
self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
5757
self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
5858

59-
mu, sigma, self.v = self._build_net(N_A)
59+
mu, sigma, self.v = self._build_net()
6060

6161
td = tf.subtract(self.v_target, self.v, name='TD_error')
6262
with tf.name_scope('c_loss'):
@@ -76,7 +76,7 @@ def __init__(self, scope, globalAC=None):
7676
self.a_loss = tf.reduce_mean(-self.exp_v)
7777

7878
with tf.name_scope('choose_a'): # use local params to choose action
79-
self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1])
79+
self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1)), A_BOUND[0], A_BOUND[1])
8080

8181
with tf.name_scope('local_grad'):
8282
self.a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
@@ -94,7 +94,7 @@ def __init__(self, scope, globalAC=None):
9494
self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params))
9595
self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
9696

97-
def _build_net(self, n_a):
97+
def _build_net(self):
9898
w_init = tf.random_normal_initializer(0., .01)
9999
with tf.variable_scope('critic'): # only critic controls the rnn update
100100
cell_size = 128
@@ -125,7 +125,7 @@ def pull_global(self): # run by a local
125125
def choose_action(self, s, cell_state): # run by a local
126126
s = s[np.newaxis, :]
127127
a, cell_state = SESS.run([self.A, self.final_state], {self.s: s, self.init_state: cell_state})
128-
return a[0], cell_state
128+
return a, cell_state
129129

130130

131131
class Worker(object):

0 commit comments

Comments
 (0)