-
Notifications
You must be signed in to change notification settings - Fork 2
/
experiment_weighted_or.py
83 lines (67 loc) · 2.95 KB
/
experiment_weighted_or.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
Purple circle vs beige square as a function of weights
"""
import gym
import torch
import json
from gym.wrappers.monitor import Monitor
from dqn import ComposedDQN, FloatTensor, get_action
from trainer import load
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame
import numpy as np
class MaxLength(gym.Wrapper):
def __init__(self, env, length):
gym.Wrapper.__init__(self, env)
self.max_length = length
self.steps = 0
def reset(self):
self.steps = 0
return self.env.reset()
def step(self, action):
ob, reward, done, info = self.env.step(action)
self.steps += 1
if self.steps == self.max_length:
done = True
return ob, reward, done, info
if __name__ == '__main__':
max_iterations = 80
max_episodes = 100
max_trajectory = 50
task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
or (x.colour == 'purple' and x.shape == 'circle'))),
max_trajectory)
env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True)
dqn_purple_circle = load('./models/purple_circle/model.dqn', task) # entropy regularised functions
dqn_beige_crate = load('./models/beige_crate/model.dqn', task) # entropy regularised functions
weights = np.arange(1/3, 3.01, 0.05)
tally = {i: [] for i in range(len(weights))}
for iter in range(max_iterations):
for i, weight in enumerate(weights):
collected_count = [0, 0]
weight = 1
dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1])
for episode in range(max_episodes):
if episode % 1000 == 0:
print(episode)
obs = env.reset()
for _ in range(max_trajectory):
obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
# action = dqn_composed(Variable(obs, volatile=True)).data.max(1)[1].view(1, 1)[0][0]
action = get_action(dqn_composed, obs)
obs, reward, done, info = env.step(action)
if done:
collected = info['collected']
if len([c for c in collected if c.colour == 'beige' and c.shape == 'square']) > 0:
collected_count[0] += 1
elif len([c for c in collected if c.colour == 'purple' and c.shape == 'circle']) > 0:
collected_count[1] += 1
else:
print("Missed")
break
tally[i].append(collected_count)
#print('Weight = {}'.format(weight))
print(tally[i])
print(tally)
with open('./experiment_weighted_or_more/tally.json', 'w') as fp:
json.dump(tally, fp)