Skip to content

Commit 73d71e8

Browse files
committed
first test on self paced
1 parent 14c0d1e commit 73d71e8

File tree

5 files changed

+35
-31
lines changed

5 files changed

+35
-31
lines changed
Binary file not shown.

IRL/GradientIRL/gradientIRL.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,10 @@ def print_jacobian(self):
6666
def loss2(self, alpha,M):
6767
return np.dot(alpha, np.dot(M, alpha))
6868

69-
def loss(self,trajs):
69+
def loss(self,w,Ms):
7070
losses = []
71-
for traj in trajs:
72-
g = self.expert_policy.grad_log(traj)
73-
temp = np.zeros([len(self.expert_policy.get_theta()), len(self.reward.params)])
74-
for idx in range(len(self.reward.params)):
75-
temp[:,idx] = self.reward.basis_traj(traj, idx) * np.ones(len(temp))
76-
jacobian = (g*temp.T).T
77-
M = np.dot(jacobian.T, jacobian)
78-
losses.append(self.loss2(self.reward.params,M))
71+
for M in Ms:
72+
losses.append(self.loss2(w,M))
7973
return np.asarray(losses)
8074

8175

IRL/GradientIRL/main.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,13 @@
107107
# =============================================================================
108108
reward.import_from_file(write_path_girl)
109109

110-
X = 200
111-
V = 200
110+
X = 50
111+
V = 50
112112

113113

114114

115115
x = np.linspace(-1.2, 0.6, X)
116116
v = np.linspace(-0.07, 0.07,V)
117-
X = len(x)
118-
V = len(v)
119117
print(X)
120118
print(V)
121119
x, v = np.meshgrid(x, v)
@@ -132,9 +130,6 @@
132130
# =============================================================================
133131
# r[i,j] = reward.basis([xi,vj],0)
134132
# =============================================================================
135-
print(x.shape)
136-
print(v.shape)
137-
print(r.shape)
138133
ax.plot_surface(x, v, r.T, cmap=cm.coolwarm,
139134
linewidth=0, antialiased=False)
140135

@@ -144,8 +139,9 @@
144139
f_sp = irl.GIRL(reward_sp, policy)
145140
K0=1
146141
eps=1 #not working for now
142+
mu=0.5
147143

148-
girl_self_paced = Self_Paced(f_sp,K0,eps,data)
144+
girl_self_paced = Self_Paced(f_sp,K0,eps,mu)
149145
trajs = girl_self_paced.import_data(data)
150146
alphass = girl_self_paced.fit(trajs)
151147

@@ -154,15 +150,15 @@
154150

155151
#plot(alphas)
156152

153+
print(alphass)
154+
157155
reward_sp.set_params(alphass[-1])
158156

159157
reward_sp.export_to_file(write_path_self_paced)
160158
#reward.import_from_file(write_path)
161159

162160
x = np.linspace(-1.2, 0.6, X)
163161
v = np.linspace(-0.07, 0.07, V)
164-
X = len(x)
165-
V = len(v)
166162
print(X)
167163
print(V)
168164
x, v = np.meshgrid(x, v)

IRL/SelfPaced.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88
import scipy.optimize as opt
99
from IRL import IRL
10+
from tqdm import tqdm
1011

1112
#Self paced
1213

@@ -18,6 +19,7 @@ def __init__(self,f,K0,eps,mu,model=None,constraint='hard'):
1819
self.eps=eps
1920
self.mu = mu
2021
# params correspond to the ws in the paper.
22+
self.model = model
2123
if self.model is None:
2224
self.params = self.zero()
2325
else:
@@ -43,10 +45,12 @@ def fit(self,trajs):
4345
ws = []
4446

4547
loss = []
46-
while((self.v == np.ones(len(trajs))).all()): #find a termination condition perhaps double while (alternative search, and then decrement)
47-
48+
while(not (self.v == np.ones(len(trajs))).all()): #find a termination condition perhaps double while (alternative search, and then decrement)
49+
print('hey')
4850
#Alternative search strategy
4951
while(start == True or not((old_v == self.v).all())):
52+
print('ho')
53+
start=False
5054
#minimising for v
5155
# =============================================================================
5256
# result_v = opt.minimize(self.objective_v, v0, constraints=self.v_constraints)
@@ -55,35 +59,45 @@ def fit(self,trajs):
5559
# print(result_v)
5660
# self.v = result_v.x
5761
# =============================================================================
58-
59-
losses = self.f.loss(trajs)
60-
print(losses)
62+
Ms = []
63+
for traj in tqdm(trajs):
64+
g = self.f.expert_policy.grad_log(traj)
65+
temp = np.zeros([len(self.f.expert_policy.get_theta()), len(self.f.reward.params)])
66+
for idx in range(len(self.f.reward.params)):
67+
temp[:,idx] = self.f.reward.basis_traj(traj, idx) * np.ones(len(temp))
68+
jacobian = (g*temp.T).T
69+
Ms.append(np.dot(jacobian.T, jacobian))
70+
71+
losses = self.f.loss(self.f.reward.params,Ms)
72+
print(np.sum(self.v))
6173
#second method use dirac
6274
old_v=self.v
6375
self.v = np.where(losses < 1/self.K,1,0)
6476

6577
#minimising for w
66-
result_w = opt.minimize(self.objective_w, self.w)
78+
print('minimise W')
79+
80+
result_w = opt.minimize(self.objective_w, self.w,args=(Ms,))
6781
if not result_w.success:
6882
print(result_w.message)
6983
print(result_w)
7084
self.w = result_w.x
7185
self.f.reward.set_params(self.w)
7286

73-
ws.append(self.w)
87+
ws.append(self.w)
7488
self.K=self.mu * self.K
7589

7690
return ws
7791

78-
def objective_w(self,w,trajs):
79-
return(np.dot(self.v, self.f.loss(w,trajs))+self.reg(w) ) #le reste est independant de w donc pas besoin de calculer
92+
def objective_w(self,w,Ms):
93+
return(np.dot(self.v, self.f.loss(w,Ms))+self.reg(w) ) #le reste est independant de w donc pas besoin de calculer
8094

8195
def objective_v(self,v,trajs):
82-
return(np.dot(v,self.f.loss(trajs)) - np.sum(v)/self.K) #think about a way to only calculate objective if v is 1
96+
return(np.dot(v,self.f.loss(w,trajs)) - np.sum(v)/self.K) #think about a way to only calculate objective if v is 1
8397

8498
def objective(self,inputs,trajs):
8599
w,v = inputs
86-
return(self.reg(w) + v*self.f.objective(self.w) - np.sum(v)/self.K)
100+
return(self.reg(w) + np.dot(v,self.f.loss(self.w,trajs)) - np.sum(v)/self.K)
87101

88102

89103

utils/reward.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self, dx, dv, env):
2323
# tune sigma according to the discretization
2424
self.sigma_inv = inv(np.array([[0.5*(self.lx/self.dx)**2, 0. ],
2525
[0., 0.5*(self.lv/self.dv)**2]]))
26-
self.params = np.zeros(dx * dv)
26+
self.params = np.ones(dx * dv)/(dx*dv)
2727

2828
def value(self, state, action):
2929
r = 0.

0 commit comments

Comments
 (0)