We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f3a84f9 commit b6a2122Copy full SHA for b6a2122
berkeley/hw3/dqn.py
@@ -167,7 +167,7 @@ def __init__(
167
action_indices = tf.stack([row_indices, self.act_t_ph], axis=1)
168
yhat = tf.gather_nd(self.q_t, action_indices)
169
170
- q_target = q_func(self.obs_tp1_ph, self.num_actions, scope="target_q_func")
+ q_target = q_func(obs_tp1_float, self.num_actions, scope="target_q_func")
171
max_target_q_val = tf.reduce_max(q_target, axis=-1)
172
y = self.rew_t_ph + gamma * max_target_q_val * (1 - self.done_mask_ph)
173
0 commit comments