Skip to content

Commit 4134142

Browse files
committed
take the mean of the loss
use the float obs placeholder
1 parent af05392 commit 4134142

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

berkeley/hw3/dqn.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def __init__(
160160
# YOUR CODE HERE
161161

162162
# Formula from "classic deep learning" in http://rail.eecs.berkeley.edu/deeprlcourse/static/slides/lec-8.pdf slide
163-
self.q_t = q_func(self.obs_t_ph, self.num_actions, scope="q")
163+
self.q_t = q_func(obs_t_float, self.num_actions, scope="q")
164164

165165
# select the corresponding action from q_t for yhat
166166
row_indices = tf.range(tf.shape(self.act_t_ph)[0])
@@ -170,11 +170,11 @@ def __init__(
170170
qtarget_tp1 = q_func(self.obs_tp1_ph, self.num_actions, scope="q_target")
171171
y = self.rew_t_ph + gamma * tf.reduce_max(qtarget_tp1, axis=-1) * (1. - self.done_mask_ph)
172172

173+
self.total_error = tf.reduce_mean(huber_loss(yhat - y))
174+
173175
q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "q")
174176
target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "q_target")
175177

176-
self.total_error = huber_loss(yhat - y)
177-
178178
######
179179

180180
# construct optimization op (with gradient clipping)

0 commit comments

Comments
 (0)