move replace target to graph building

MorvanZhou · Morvan Zhou · commit 0f546db82bd5 · 2017-08-15T15:39:13.000+10:00
diff --git a/contents/5.1_Double_DQN/RL_brain.py b/contents/5.1_Double_DQN/RL_brain.py
@@ -47,6 +47,10 @@ def __init__(
         self.learn_step_counter = 0
         self.memory = np.zeros((self.memory_size, n_features*2+2))
         self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
+
         if sess is None:
             self.sess = tf.Session()
             self.sess.run(tf.global_variables_initializer())
@@ -114,14 +118,9 @@ def choose_action(self, observation):
             action = np.random.randint(0, self.n_actions)
         return action
 
-    def _replace_target_params(self):
-        t_params = tf.get_collection('target_net_params')
-        e_params = tf.get_collection('eval_net_params')
-        self.sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
-
     def learn(self):
         if self.learn_step_counter % self.replace_target_iter == 0:
-            self._replace_target_params()
+            self.sess.run(self.replace_target_op)
             print('\ntarget_params_replaced\n')
 
         if self.memory_counter > self.memory_size:
diff --git a/contents/5.2_Prioritized_Replay_DQN/RL_brain.py b/contents/5.2_Prioritized_Replay_DQN/RL_brain.py
@@ -176,6 +176,9 @@ def __init__(
         self.learn_step_counter = 0
 
         self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 
         if self.prioritized:
             self.memory = Memory(capacity=memory_size)
@@ -254,14 +257,9 @@ def choose_action(self, observation):
             action = np.random.randint(0, self.n_actions)
         return action
 
-    def _replace_target_params(self):
-        t_params = tf.get_collection('target_net_params')
-        e_params = tf.get_collection('eval_net_params')
-        self.sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
-
     def learn(self):
         if self.learn_step_counter % self.replace_target_iter == 0:
-            self._replace_target_params()
+            self.sess.run(self.replace_target_op)
             print('\ntarget_params_replaced\n')
 
         if self.prioritized:
diff --git a/contents/5.3_Dueling_DQN/RL_brain.py b/contents/5.3_Dueling_DQN/RL_brain.py
@@ -47,6 +47,10 @@ def __init__(
         self.learn_step_counter = 0
         self.memory = np.zeros((self.memory_size, n_features*2+2))
         self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
+
         if sess is None:
             self.sess = tf.Session()
             self.sess.run(tf.global_variables_initializer())
@@ -124,14 +128,9 @@ def choose_action(self, observation):
             action = np.random.randint(0, self.n_actions)
         return action
 
-    def _replace_target_params(self):
-        t_params = tf.get_collection('target_net_params')
-        e_params = tf.get_collection('eval_net_params')
-        self.sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
-
     def learn(self):
         if self.learn_step_counter % self.replace_target_iter == 0:
-            self._replace_target_params()
+            self.sess.run(self.replace_target_op)
             print('\ntarget_params_replaced\n')
 
         sample_index = np.random.choice(self.memory_size, size=self.batch_size)
diff --git a/contents/5_Deep_Q_Network/RL_brain.py b/contents/5_Deep_Q_Network/RL_brain.py
@@ -52,6 +52,9 @@ def __init__(
 
         # consist of [target_net, evaluate_net]
         self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 
         self.sess = tf.Session()
 
@@ -132,15 +135,10 @@ def choose_action(self, observation):
             action = np.random.randint(0, self.n_actions)
         return action
 
-    def _replace_target_params(self):
-        t_params = tf.get_collection('target_net_params')
-        e_params = tf.get_collection('eval_net_params')
-        self.sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
-
     def learn(self):
         # check to replace target parameters
         if self.learn_step_counter % self.replace_target_iter == 0:
-            self._replace_target_params()
+            self.sess.run(self.replace_target_op)
             print('\ntarget_params_replaced\n')
 
         # sample batch memory from all memory
diff --git a/contents/6_OpenAI_gym/RL_brain.py b/contents/6_OpenAI_gym/RL_brain.py
@@ -52,6 +52,9 @@ def __init__(
 
         # consist of [target_net, evaluate_net]
         self._build_net()
+        t_params = tf.get_collection('target_net_params')
+        e_params = tf.get_collection('eval_net_params')
+        self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 
         self.sess = tf.Session()
 
@@ -132,15 +135,10 @@ def choose_action(self, observation):
             action = np.random.randint(0, self.n_actions)
         return action
 
-    def _replace_target_params(self):
-        t_params = tf.get_collection('target_net_params')
-        e_params = tf.get_collection('eval_net_params')
-        self.sess.run([tf.assign(t, e) for t, e in zip(t_params, e_params)])
-
     def learn(self):
         # check to replace target parameters
         if self.learn_step_counter % self.replace_target_iter == 0:
-            self._replace_target_params()
+            self.sess.run(self.replace_target_op)
             print('\ntarget_params_replaced\n')
 
         # sample batch memory from all memory