Skip to content

Commit eee7488

Browse files
committed
changed main and reward
1 parent c24304a commit eee7488

File tree

4 files changed

+51
-34
lines changed

4 files changed

+51
-34
lines changed

IRL/GradientIRL/main.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242

4343
policy.set_theta(np.array([-18, -1, 18]))
44-
#policy.episode(render=True)
44+
policy.episode(render=True)
4545

4646
#policy.episode(render=True)
4747
for i in range(10):
@@ -52,7 +52,7 @@
5252
print('solving the IRL problem:')
5353

5454
dx = 10
55-
dv = 5
55+
dv = 10
5656

5757
reward = rew.Reward(dx, dv, env)
5858

@@ -67,7 +67,6 @@ def plot(p):
6767
ax.scatter(i, j, p[dv*i+j], c='r')
6868
plt.show()
6969

70-
plot(reward.params)
7170

7271
'''
7372
print('')
@@ -82,20 +81,8 @@ def plot(p):
8281
'''
8382

8483
girl = irl.GIRL(reward, data, policy)
85-
girl.compute_jacobian()
86-
#print(girl.jacobian)
87-
alphas = girl.solve()
8884

89-
plt.plot(alphas)
90-
#plt.show()
91-
92-
#plot(alphas)
9385

94-
#reward.set_params(alphas)
95-
reward.export_to_file(write_path)
96-
#reward.plot()
97-
98-
reward.import_from_file(write_path)
9986

10087
x = np.arange(-1.2, 0.6, 0.1)
10188
print(x)
@@ -108,10 +95,41 @@ def plot(p):
10895

10996
r = np.zeros([X, V])
11097

111-
plt.plot(reward.params)
98+
#reward.plot()
99+
100+
101+
'''
102+
'''
103+
'''
104+
fig = plt.figure()
105+
ax = fig.add_subplot(111, projection='3d')
106+
for i in range(X):
107+
for j in range(V):
108+
xi = i / (X-1) * 1.8 - 0.6
109+
vj = j / (V-1) * 0.14 - 0.07
110+
ax.scatter(i, j, reward.value([xi, vj], 1), c='r')
112111
plt.show()
112+
'''
113+
114+
# LOAD FROM FILE
115+
'''
116+
reward.import_from_file(write_path)
117+
'''
118+
119+
# SOLVE IRL
120+
121+
girl.compute_jacobian()
122+
alphas = girl.solve()
123+
124+
plt.plot(alphas)
125+
plt.show()
126+
127+
plot(alphas)
128+
129+
reward.set_params(alphas)
130+
#reward.export_to_file(write_path)
131+
reward.plot()
113132

114-
#reward.plot()
115133

116134
fig = plt.figure()
117135
ax = fig.gca(projection='3d')
@@ -128,18 +146,7 @@ def plot(p):
128146
linewidth=0, antialiased=False)
129147

130148
plt.show()
131-
'''
132-
'''
133-
'''
134-
fig = plt.figure()
135-
ax = fig.add_subplot(111, projection='3d')
136-
for i in range(X):
137-
for j in range(V):
138-
xi = i / (X-1) * 1.8 - 0.6
139-
vj = j / (V-1) * 0.14 - 0.07
140-
ax.scatter(i, j, reward.value([xi, vj], 1), c='r')
141-
plt.show()
142-
'''
149+
143150

144151
env.close()
145152

IRL/GradientIRL/reward_params.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1-
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
2-
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
3-
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
1+
array([ 3.20429979e-02, 1.13662029e-02, -6.84247028e-03, 1.47535600e-03,
2+
3.86987504e-02, 2.14374321e-02, 1.57548598e-03, 1.05301856e-03,
3+
-1.51573842e-02, 3.45703982e-02, 1.32795741e-02, 7.92573341e-03,
4+
3.51774285e-03, -6.63390638e-03, 2.41702979e-02, 1.21157862e-02,
5+
9.02378858e-03, 2.58939558e-02, 9.38206011e-05, 1.32384782e-02,
6+
1.88292298e-02, 2.70662152e-03, 3.44741147e-02, -1.78338057e-03,
7+
1.02558879e-02, 2.93105403e-02, -7.47423504e-03, 2.12172532e-02,
8+
-1.01983640e-02, 1.62862682e-02, 3.65679711e-02, -1.85521598e-02,
9+
-2.27282088e-02, -5.14086811e-03, 2.63669257e-02, 3.89854969e-02,
10+
1.17925597e-02, -2.72827510e-02, 6.35790925e-03, 3.40425363e-02,
11+
3.93589185e-02, 3.31425044e-02, 3.26526118e-02, 4.23271807e-03,
12+
3.74840496e-02, 3.93833440e-02, 3.88174200e-02, 4.93203196e-02,
13+
2.62631597e-02, 3.88790920e-02])
28 Bytes
Binary file not shown.

utils/reward.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ def __init__(self, dx, dv, env):
2121
self.zx = - sp.low[0] # zero of the position interval
2222
self.zv = - sp.low[1] # zero of the velocity interval
2323
# tune sigma according to the discretization
24-
self.sigma_inv = inv(np.array([[.05, 0. ],
25-
[0., .0003]]))
24+
self.sigma_inv = inv(np.array([[0.5*(self.lx/self.dx)**2, 0. ],
25+
[0., 0.5*(self.lv/self.dv)**2]]))
2626
self.params = np.zeros(dx * dv)
2727

2828
def value(self, state, action):

0 commit comments

Comments
 (0)