Skip to content

Commit 2058a35

Browse files
committed
recoded reward
1 parent 0f6f2e5 commit 2058a35

File tree

5 files changed

+37
-26
lines changed

5 files changed

+37
-26
lines changed

Figure_1.png

-91.7 KB
Binary file not shown.

Figure_2.png

-202 KB
Binary file not shown.

Figure_3.png

-65.7 KB
Binary file not shown.

IRL/GradientIRL/main.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,21 @@ def plot_reward(reward,title):
114114
plt.show()
115115
'''
116116

117+
girl = irl.GIRL(reward, policy)
118+
trajs = girl.import_data(data)
119+
#girl.compute_jacobian()
120+
#print(girl.jacobian)
121+
alphas = girl.solve(trajs)
122+
123+
# plt.plot(alphas)
117124
# =============================================================================
118-
# girl = irl.GIRL(reward, policy)
119-
# trajs = girl.import_data(data)
120-
# #girl.compute_jacobian()
121-
# #print(girl.jacobian)
122-
# alphas = girl.solve(trajs)
123-
#
124-
# # plt.plot(alphas)
125-
# # =============================================================================
126-
# #plt.show()
127-
#
128-
# #plot(alphas)
125+
#plt.show()
126+
127+
#plot(alphas)
128+
129+
reward.set_params(alphas)
129130
#
130-
# reward.set_params(alphas)
131-
# #
132-
# reward.export_to_file(write_path_girl)
133-
# =============================================================================
131+
reward.export_to_file(write_path_girl)
134132

135133
reward.import_from_file(write_path_girl)
136134

@@ -140,7 +138,7 @@ def plot_reward(reward,title):
140138
f_sp = irl.GIRL(reward_sp, policy)
141139
K0=10e4
142140
eps=10e-15 #not working for now
143-
mu=0.5
141+
mu=0.5
144142

145143
girl_self_paced = Self_Paced(f_sp,K0,eps,mu)
146144
trajs = girl_self_paced.import_data(data)

utils/reward.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,38 @@ def __init__(self, dx, dv, env):
2525
[0., 0.5*(self.lv/self.dv)**2]]))
2626
self.params = np.ones(dx * dv)
2727
self.params /=np.linalg.norm(self.params,1)
28+
29+
self.centers = np.zeros((dx*dv,2))
30+
self.fill_centers()
2831
# =============================================================================
2932
# self.params = np.zeros(dx*dv)
3033
# =============================================================================
3134
self.env = env
35+
36+
def fill_centers(self):
37+
for i in range(self.dx):
38+
self.centers[i*self.dv:(i+1)*self.dv,0] += i / (self.dx-1) * self.lx - self.zx
39+
for j in range(self.dv):
40+
self.centers[j::self.dv,1] += j / (self.dv-1) * self.lv - self.zv
3241

3342
def value(self, state, action):
34-
r = 0.
35-
for idx in range(self.dx*self.dv):
36-
r += self.params[idx] * self.basis(state, idx)
43+
# =============================================================================
44+
# r = 0.
45+
# for idx in range(self.dx*self.dv):
46+
# r += self.params[idx] * self.basis(state, idx)
47+
# =============================================================================
48+
r=np.dot(self.params,self.basis2(state))
3749
return r
3850

51+
def basis2(self,state):
52+
state_normalized = state - self.centers
53+
54+
result = np.einsum('ij,ij->i', np.dot(state_normalized, self.sigma_inv), state_normalized)
55+
return np.exp(-result/2)
56+
3957
def basis(self, state, idx):
40-
j = idx % self.dv
41-
i = (idx-j)//self.dv
42-
x, v = state
43-
xi = i / (self.dx-1) * self.lx - self.zx
44-
vj = j / (self.dv-1) * self.lv - self.zv
45-
s = np.array([x, v])
46-
si = np.array([xi, vj])
58+
s = state
59+
si = self.centers[idx]
4760
return np.exp(-np.dot((s - si), np.dot(self.sigma_inv, (s - si))))
4861

4962
def partial_value(self, state, action, idx):

0 commit comments

Comments
 (0)