|
4 | 4 |
|
5 | 5 | import numpy as np
|
6 | 6 | import tensorflow as tf
|
7 |
| -import tensorflow.contrib.slim as slim |
8 | 7 |
|
9 | 8 | from ray.experimental.tfutils import TensorFlowVariables
|
| 9 | +from ray.rllib.models.ddpgnet import DDPGActor, DDPGCritic |
10 | 10 | from ray.rllib.ddpg.random_process import OrnsteinUhlenbeckProcess
|
11 | 11 |
|
12 | 12 |
|
@@ -202,50 +202,20 @@ def _setup_critic_loss(self, action_space):
|
202 | 202 | def _setup_critic_network(self, obs_space, ac_space):
|
203 | 203 | """Sets up Q network."""
|
204 | 204 | with tf.variable_scope("critic", reuse=tf.AUTO_REUSE):
|
205 |
| - self.critic_eval = self._create_critic_network( |
206 |
| - self.obs, self.act) |
| 205 | + self.critic_network = DDPGCritic((self.obs, self.act), 1, {}) |
| 206 | + self.critic_eval = self.critic_network.outputs |
207 | 207 |
|
208 | 208 | with tf.variable_scope("critic", reuse=True):
|
209 |
| - tf.get_variable_scope().reuse_variables() |
210 |
| - self.cn_for_loss = self._create_critic_network( |
211 |
| - self.obs, self.output_action) |
212 |
| - |
213 |
| - def _create_critic_network(self, obs, action): |
214 |
| - """Network for critic.""" |
215 |
| - w_normal = tf.truncated_normal_initializer() |
216 |
| - w_init = tf.random_uniform_initializer(minval=-0.0003, maxval=0.0003) |
217 |
| - net = slim.fully_connected( |
218 |
| - obs, 400, activation_fn=tf.nn.relu, weights_initializer=w_normal) |
219 |
| - t1 = slim.fully_connected( |
220 |
| - net, 300, activation_fn=None, biases_initializer=None, |
221 |
| - weights_initializer=w_normal) |
222 |
| - t2 = slim.fully_connected( |
223 |
| - action, 300, activation_fn=None, weights_initializer=w_normal) |
224 |
| - net = tf.nn.relu(tf.add(t1, t2)) |
225 |
| - |
226 |
| - out = slim.fully_connected( |
227 |
| - net, 1, activation_fn=None, weights_initializer=w_init) |
228 |
| - return out |
| 209 | + self.cn_for_loss = DDPGCritic( |
| 210 | + (self.obs, self.output_action), 1, {}).outputs |
229 | 211 |
|
230 | 212 | def _setup_actor_network(self, obs_space, ac_space):
|
231 | 213 | """Sets up actor network."""
|
232 | 214 | with tf.variable_scope("actor", reuse=tf.AUTO_REUSE):
|
233 |
| - self.output_action = self._create_actor_network(self.obs) |
234 |
| - |
235 |
| - def _create_actor_network(self, obs): |
236 |
| - """Network for actor.""" |
237 |
| - w_normal = tf.truncated_normal_initializer() |
238 |
| - w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003) |
239 |
| - |
240 |
| - net = slim.fully_connected( |
241 |
| - obs, 400, activation_fn=tf.nn.relu, weights_initializer=w_normal) |
242 |
| - net = slim.fully_connected( |
243 |
| - net, 300, activation_fn=tf.nn.relu, weights_initializer=w_normal) |
244 |
| - out = slim.fully_connected( |
245 |
| - net, self.ac_size, activation_fn=tf.nn.tanh, |
246 |
| - weights_initializer=w_init) |
247 |
| - scaled_out = tf.multiply(out, self.action_bound) |
248 |
| - return scaled_out |
| 215 | + self.actor_network = DDPGActor( |
| 216 | + self.obs, self.ac_size, |
| 217 | + options={"action_bound": self.action_bound}) |
| 218 | + self.output_action = self.actor_network.outputs |
249 | 219 |
|
250 | 220 | def get_weights(self):
|
251 | 221 | """Returns critic weights, actor weights."""
|
|
0 commit comments