Skip to content

Commit d715846

Browse files
committed
update
1 parent f108f1c commit d715846

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

rl2/atari/dqn_tf.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def get_minibatch(self):
145145

146146

147147
class DQN:
148-
def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma, scope):
148+
def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, scope):
149149

150150
self.K = K
151151
self.scope = scope
@@ -374,13 +374,11 @@ def smooth(x):
374374
K=K,
375375
conv_layer_sizes=conv_layer_sizes,
376376
hidden_layer_sizes=hidden_layer_sizes,
377-
gamma=gamma,
378377
scope="model")
379378
target_model = DQN(
380379
K=K,
381380
conv_layer_sizes=conv_layer_sizes,
382381
hidden_layer_sizes=hidden_layer_sizes,
383-
gamma=gamma,
384382
scope="target_model"
385383
)
386384
image_transformer = ImageTransformer()

rl2/atari/dqn_theano.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def forward(self, X):
217217
return self.f(a)
218218

219219
class DQN:
220-
def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
220+
def __init__(self, K, conv_layer_sizes, hidden_layer_sizes):
221221
self.K = K
222222

223223
# inputs and targets
@@ -253,7 +253,7 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
253253
# build fully connected layers
254254
self.layers = []
255255
M1 = flattened_ouput_size
256-
# print("flattened_ouput_size:", flattened_ouput_size)
256+
print("flattened_ouput_size:", flattened_ouput_size)
257257
for M2 in hidden_layer_sizes:
258258
layer = HiddenLayer(M1, M2)
259259
self.layers.append(layer)
@@ -284,6 +284,7 @@ def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
284284
# compile functions
285285
self.train_op = theano.function(
286286
inputs=[X, G, actions],
287+
outputs=cost,
287288
updates=updates,
288289
allow_input_downcast=True
289290
)
@@ -305,7 +306,7 @@ def predict(self, X):
305306
return self.predict_op(X)
306307

307308
def update(self, states, actions, targets):
308-
self.train_op(states, targets, actions)
309+
return self.train_op(states, targets, actions)
309310

310311
def sample_action(self, x, eps):
311312
if np.random.random() < eps:
@@ -434,13 +435,11 @@ def smooth(x):
434435
K=K,
435436
conv_layer_sizes=conv_layer_sizes,
436437
hidden_layer_sizes=hidden_layer_sizes,
437-
gamma=gamma,
438438
)
439439
target_model = DQN(
440440
K=K,
441441
conv_layer_sizes=conv_layer_sizes,
442442
hidden_layer_sizes=hidden_layer_sizes,
443-
gamma=gamma,
444443
)
445444

446445

@@ -451,6 +450,7 @@ def smooth(x):
451450

452451
action = np.random.choice(K)
453452
obs, reward, done, _ = env.step(action)
453+
obs_small = downsample_image(obs)
454454
experience_replay_buffer.add_experience(action, obs_small, reward, done)
455455

456456
if done:

0 commit comments

Comments
 (0)