Skip to content

Commit 9e9fa57

Browse files
committed
add experiments for part 1 q3
1 parent a9db82f commit 9e9fa57

File tree

7 files changed

+666
-1
lines changed

7 files changed

+666
-1
lines changed

berkeley/hw3/answers/q3.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# q2
2+
python run_dqn_atari.py
3+
4+
# q3
5+
python run_dqn_atari.1.py
6+
python run_dqn_atari.2.py
7+
python run_dqn_atari.3.py
8+
python run_dqn_atari.4.py
9+
python run_dqn_atari.5.py

berkeley/hw3/run_dqn_atari.1.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import argparse
2+
import gym
3+
from gym import wrappers
4+
import os.path as osp
5+
import random
6+
import numpy as np
7+
import tensorflow as tf
8+
import tensorflow.contrib.layers as layers
9+
10+
import dqn
11+
from dqn_utils import *
12+
from atari_wrappers import *
13+
14+
15+
def atari_model(img_in, num_actions, scope, reuse=False):
16+
# as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
17+
with tf.variable_scope(scope, reuse=reuse):
18+
out = img_in
19+
with tf.variable_scope("convnet"):
20+
# original architecture
21+
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
22+
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
23+
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
24+
out = layers.flatten(out)
25+
with tf.variable_scope("action_value"):
26+
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
27+
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
28+
out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
29+
30+
return out
31+
32+
def atari_learn(env,
33+
session,
34+
num_timesteps):
35+
# This is just a rough estimate
36+
num_iterations = float(num_timesteps) / 4.0
37+
38+
lr_multiplier = 1.0
39+
lr_schedule = PiecewiseSchedule([
40+
(0, 1e-4 * lr_multiplier),
41+
(num_iterations / 10, 1e-4 * lr_multiplier),
42+
(num_iterations / 2, 5e-5 * lr_multiplier),
43+
],
44+
outside_value=5e-5 * lr_multiplier)
45+
optimizer = dqn.OptimizerSpec(
46+
constructor=tf.train.AdamOptimizer,
47+
kwargs=dict(epsilon=1e-4),
48+
lr_schedule=lr_schedule
49+
)
50+
51+
def stopping_criterion(env, t):
52+
# notice that here t is the number of steps of the wrapped env,
53+
# which is different from the number of steps in the underlying env
54+
return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps
55+
56+
exploration_schedule = PiecewiseSchedule(
57+
[
58+
(0, 1.0),
59+
(1e6, 0.1),
60+
(num_iterations / 2, 0.01),
61+
], outside_value=0.01
62+
)
63+
64+
dqn.learn(
65+
env=env,
66+
q_func=atari_model,
67+
optimizer_spec=optimizer,
68+
session=session,
69+
exploration=exploration_schedule,
70+
stopping_criterion=stopping_criterion,
71+
replay_buffer_size=1000000,
72+
batch_size=32,
73+
gamma=0.99,
74+
learning_starts=50000,
75+
learning_freq=4,
76+
frame_history_len=4,
77+
target_update_freq=10000,
78+
grad_norm_clipping=10,
79+
double_q=True
80+
)
81+
env.close()
82+
83+
def get_available_gpus():
84+
from tensorflow.python.client import device_lib
85+
local_device_protos = device_lib.list_local_devices()
86+
return [x.physical_device_desc for x in local_device_protos if x.device_type == 'GPU']
87+
88+
def set_global_seeds(i):
89+
try:
90+
import tensorflow as tf
91+
except ImportError:
92+
pass
93+
else:
94+
tf.set_random_seed(i)
95+
np.random.seed(i)
96+
random.seed(i)
97+
98+
def get_session():
99+
tf.reset_default_graph()
100+
tf_config = tf.ConfigProto(
101+
inter_op_parallelism_threads=1,
102+
intra_op_parallelism_threads=1)
103+
session = tf.Session(config=tf_config)
104+
print("AVAILABLE GPUS: ", get_available_gpus())
105+
return session
106+
107+
def get_env(task, seed):
108+
env = gym.make('PongNoFrameskip-v4')
109+
110+
set_global_seeds(seed)
111+
env.seed(seed)
112+
113+
expt_dir = '/tmp/hw3_vid_dir2/'
114+
env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
115+
env = wrap_deepmind(env)
116+
117+
return env
118+
119+
def main():
120+
# Get Atari games.
121+
task = gym.make('PongNoFrameskip-v4')
122+
123+
# Run training
124+
seed = random.randint(0, 9999)
125+
print('random seed = %d' % seed)
126+
env = get_env(task, seed)
127+
session = get_session()
128+
atari_learn(env, session, num_timesteps=8e6)
129+
130+
if __name__ == "__main__":
131+
main()

berkeley/hw3/run_dqn_atari.2.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import argparse
2+
import gym
3+
from gym import wrappers
4+
import os.path as osp
5+
import random
6+
import numpy as np
7+
import tensorflow as tf
8+
import tensorflow.contrib.layers as layers
9+
10+
import dqn
11+
from dqn_utils import *
12+
from atari_wrappers import *
13+
14+
15+
def atari_model(img_in, num_actions, scope, reuse=False):
16+
# as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
17+
with tf.variable_scope(scope, reuse=reuse):
18+
out = img_in
19+
with tf.variable_scope("convnet"):
20+
# original architecture
21+
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
22+
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
23+
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
24+
out = layers.flatten(out)
25+
with tf.variable_scope("action_value"):
26+
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
27+
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
28+
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
29+
out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
30+
31+
return out
32+
33+
def atari_learn(env,
34+
session,
35+
num_timesteps):
36+
# This is just a rough estimate
37+
num_iterations = float(num_timesteps) / 4.0
38+
39+
lr_multiplier = 1.0
40+
lr_schedule = PiecewiseSchedule([
41+
(0, 1e-4 * lr_multiplier),
42+
(num_iterations / 10, 1e-4 * lr_multiplier),
43+
(num_iterations / 2, 5e-5 * lr_multiplier),
44+
],
45+
outside_value=5e-5 * lr_multiplier)
46+
optimizer = dqn.OptimizerSpec(
47+
constructor=tf.train.AdamOptimizer,
48+
kwargs=dict(epsilon=1e-4),
49+
lr_schedule=lr_schedule
50+
)
51+
52+
def stopping_criterion(env, t):
53+
# notice that here t is the number of steps of the wrapped env,
54+
# which is different from the number of steps in the underlying env
55+
return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps
56+
57+
exploration_schedule = PiecewiseSchedule(
58+
[
59+
(0, 1.0),
60+
(1e6, 0.1),
61+
(num_iterations / 2, 0.01),
62+
], outside_value=0.01
63+
)
64+
65+
dqn.learn(
66+
env=env,
67+
q_func=atari_model,
68+
optimizer_spec=optimizer,
69+
session=session,
70+
exploration=exploration_schedule,
71+
stopping_criterion=stopping_criterion,
72+
replay_buffer_size=1000000,
73+
batch_size=32,
74+
gamma=0.99,
75+
learning_starts=50000,
76+
learning_freq=4,
77+
frame_history_len=4,
78+
target_update_freq=10000,
79+
grad_norm_clipping=10,
80+
double_q=True
81+
)
82+
env.close()
83+
84+
def get_available_gpus():
85+
from tensorflow.python.client import device_lib
86+
local_device_protos = device_lib.list_local_devices()
87+
return [x.physical_device_desc for x in local_device_protos if x.device_type == 'GPU']
88+
89+
def set_global_seeds(i):
90+
try:
91+
import tensorflow as tf
92+
except ImportError:
93+
pass
94+
else:
95+
tf.set_random_seed(i)
96+
np.random.seed(i)
97+
random.seed(i)
98+
99+
def get_session():
100+
tf.reset_default_graph()
101+
tf_config = tf.ConfigProto(
102+
inter_op_parallelism_threads=1,
103+
intra_op_parallelism_threads=1)
104+
session = tf.Session(config=tf_config)
105+
print("AVAILABLE GPUS: ", get_available_gpus())
106+
return session
107+
108+
def get_env(task, seed):
109+
env = gym.make('PongNoFrameskip-v4')
110+
111+
set_global_seeds(seed)
112+
env.seed(seed)
113+
114+
expt_dir = '/tmp/hw3_vid_dir2/'
115+
env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
116+
env = wrap_deepmind(env)
117+
118+
return env
119+
120+
def main():
121+
# Get Atari games.
122+
task = gym.make('PongNoFrameskip-v4')
123+
124+
# Run training
125+
seed = random.randint(0, 9999)
126+
print('random seed = %d' % seed)
127+
env = get_env(task, seed)
128+
session = get_session()
129+
atari_learn(env, session, num_timesteps=8e6)
130+
131+
if __name__ == "__main__":
132+
main()

0 commit comments

Comments
 (0)