update

bob7783 · bob7783 · commit d715846c1673 · 2019-02-10T00:21:57.000-05:00
diff --git a/rl2/atari/dqn_tf.py b/rl2/atari/dqn_tf.py
@@ -145,7 +145,7 @@ def get_minibatch(self):
 
 
 class DQN:
-  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma, scope):
+  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, scope):
 
     self.K = K
     self.scope = scope
@@ -374,13 +374,11 @@ def smooth(x):
     K=K,
     conv_layer_sizes=conv_layer_sizes,
     hidden_layer_sizes=hidden_layer_sizes,
-    gamma=gamma,
     scope="model")
   target_model = DQN(
     K=K,
     conv_layer_sizes=conv_layer_sizes,
     hidden_layer_sizes=hidden_layer_sizes,
-    gamma=gamma,
     scope="target_model"
   )
   image_transformer = ImageTransformer()
diff --git a/rl2/atari/dqn_theano.py b/rl2/atari/dqn_theano.py
@@ -217,7 +217,7 @@ def forward(self, X):
     return self.f(a)
 
 class DQN:
-  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
+  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes):
     self.K = K
 
     # inputs and targets
@@ -253,7 +253,7 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
     # build fully connected layers
     self.layers = []
     M1 = flattened_ouput_size
-    # print("flattened_ouput_size:", flattened_ouput_size)
+    print("flattened_ouput_size:", flattened_ouput_size)
     for M2 in hidden_layer_sizes:
       layer = HiddenLayer(M1, M2)
       self.layers.append(layer)
@@ -284,6 +284,7 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
     # compile functions
     self.train_op = theano.function(
       inputs=[X, G, actions],
+      outputs=cost,
       updates=updates,
       allow_input_downcast=True
     )
@@ -305,7 +306,7 @@ def predict(self, X):
     return self.predict_op(X)
 
   def update(self, states, actions, targets):
-    self.train_op(states, targets, actions)
+    return self.train_op(states, targets, actions)
 
   def sample_action(self, x, eps):
     if np.random.random() < eps:
@@ -434,13 +435,11 @@ def smooth(x):
     K=K,
     conv_layer_sizes=conv_layer_sizes,
     hidden_layer_sizes=hidden_layer_sizes,
-    gamma=gamma,
   )
   target_model = DQN(
     K=K,
     conv_layer_sizes=conv_layer_sizes,
     hidden_layer_sizes=hidden_layer_sizes,
-    gamma=gamma,
   )
 
 
@@ -451,6 +450,7 @@ def smooth(x):
 
     action = np.random.choice(K)
     obs, reward, done, _ = env.step(action)
+    obs_small = downsample_image(obs)
     experience_replay_buffer.add_experience(action, obs_small, reward, done)
 
     if done: