@@ -53,13 +53,14 @@ def __init__(
53
53
54
54
t_params = tf .get_collection (tf .GraphKeys .GLOBAL_VARIABLES , scope = 'target_net' )
55
55
e_params = tf .get_collection (tf .GraphKeys .GLOBAL_VARIABLES , scope = 'eval_net' )
56
- self .target_replace_op = [tf .assign (t , e ) for t , e in zip (t_params , e_params )]
56
+
57
+ with tf .variable_scope ('soft_replacement' ):
58
+ self .target_replace_op = [tf .assign (t , e ) for t , e in zip (t_params , e_params )]
57
59
58
60
self .sess = tf .Session ()
59
61
60
62
if output_graph :
61
63
# $ tensorboard --logdir=logs
62
- # tf.train.SummaryWriter soon be deprecated, use following
63
64
tf .summary .FileWriter ("logs/" , self .sess .graph )
64
65
65
66
self .sess .run (tf .global_variables_initializer ())
@@ -77,16 +78,16 @@ def _build_net(self):
77
78
# ------------------ build evaluate_net ------------------
78
79
with tf .variable_scope ('eval_net' ):
79
80
e1 = tf .layers .dense (self .s , 20 , tf .nn .relu , kernel_initializer = w_initializer ,
80
- bias_initializer = b_initializer )
81
+ bias_initializer = b_initializer , name = 'e1' )
81
82
self .q_eval = tf .layers .dense (e1 , self .n_actions , kernel_initializer = w_initializer ,
82
- bias_initializer = b_initializer )
83
+ bias_initializer = b_initializer , name = 'q' )
83
84
84
85
# ------------------ build target_net ------------------
85
86
with tf .variable_scope ('target_net' ):
86
87
t1 = tf .layers .dense (self .s_ , 20 , tf .nn .relu , kernel_initializer = w_initializer ,
87
- bias_initializer = b_initializer )
88
+ bias_initializer = b_initializer , name = 't1' )
88
89
self .q_next = tf .layers .dense (t1 , self .n_actions , kernel_initializer = w_initializer ,
89
- bias_initializer = b_initializer )
90
+ bias_initializer = b_initializer , name = 't2' )
90
91
91
92
with tf .variable_scope ('q_target' ):
92
93
q_target = self .r + self .gamma * tf .reduce_max (self .q_next , axis = 1 , name = 'Qmax_s_' ) # shape=(None, )
0 commit comments