changed main and reward

laetitia-teo · laetitia-teo · commit eee74883cde1 · 2019-01-14T16:53:53.000Z
diff --git a/IRL/GradientIRL/main.py b/IRL/GradientIRL/main.py
@@ -41,7 +41,7 @@
 
 
 policy.set_theta(np.array([-18, -1, 18]))
-#policy.episode(render=True)
+policy.episode(render=True)
 
 #policy.episode(render=True)
 for i in range(10):
@@ -52,7 +52,7 @@
 print('solving the IRL problem:')
 
 dx = 10
-dv = 5
+dv = 10
 
 reward = rew.Reward(dx, dv, env)
 
@@ -67,7 +67,6 @@ def plot(p):
         ax.scatter(i, j, p[dv*i+j], c='r')
     plt.show()
 
-plot(reward.params)
 
 '''
 print('')
@@ -82,20 +81,8 @@ def plot(p):
 '''
 
 girl = irl.GIRL(reward, data, policy)
-girl.compute_jacobian()
-#print(girl.jacobian)
-alphas = girl.solve()
 
-plt.plot(alphas)
-#plt.show()
-
-#plot(alphas)
 
-#reward.set_params(alphas)
-reward.export_to_file(write_path)
-#reward.plot()
-
-reward.import_from_file(write_path)
 
 x = np.arange(-1.2, 0.6, 0.1)
 print(x)
@@ -108,10 +95,41 @@ def plot(p):
 
 r = np.zeros([X, V])
 
-plt.plot(reward.params)
+#reward.plot()
+
+
+'''
+'''
+'''
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+for i in range(X):
+    for j in range(V):
+        xi = i / (X-1) * 1.8 - 0.6
+        vj = j / (V-1) * 0.14 - 0.07
+        ax.scatter(i, j, reward.value([xi, vj], 1), c='r')
 plt.show()
+'''
+
+# LOAD FROM FILE
+'''
+reward.import_from_file(write_path)
+'''
+
+# SOLVE IRL
+
+girl.compute_jacobian()
+alphas = girl.solve()
+
+plt.plot(alphas)
+plt.show()
+
+plot(alphas)
+
+reward.set_params(alphas)
+#reward.export_to_file(write_path)
+reward.plot()
 
-#reward.plot()
 
 fig = plt.figure()
 ax = fig.gca(projection='3d')
@@ -128,18 +146,7 @@ def plot(p):
                        linewidth=0, antialiased=False)
 
 plt.show()
-'''
-'''
-'''
-fig = plt.figure()
-ax = fig.add_subplot(111, projection='3d')
-for i in range(X):
-    for j in range(V):
-        xi = i / (X-1) * 1.8 - 0.6
-        vj = j / (V-1) * 0.14 - 0.07
-        ax.scatter(i, j, reward.value([xi, vj], 1), c='r')
-plt.show()
-'''
+
 
 env.close()
 
diff --git a/IRL/GradientIRL/reward_params.txt b/IRL/GradientIRL/reward_params.txt
@@ -1,3 +1,13 @@
-array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
-       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
+array([ 3.20429979e-02,  1.13662029e-02, -6.84247028e-03,  1.47535600e-03,
+        3.86987504e-02,  2.14374321e-02,  1.57548598e-03,  1.05301856e-03,
+       -1.51573842e-02,  3.45703982e-02,  1.32795741e-02,  7.92573341e-03,
+        3.51774285e-03, -6.63390638e-03,  2.41702979e-02,  1.21157862e-02,
+        9.02378858e-03,  2.58939558e-02,  9.38206011e-05,  1.32384782e-02,
+        1.88292298e-02,  2.70662152e-03,  3.44741147e-02, -1.78338057e-03,
+        1.02558879e-02,  2.93105403e-02, -7.47423504e-03,  2.12172532e-02,
+       -1.01983640e-02,  1.62862682e-02,  3.65679711e-02, -1.85521598e-02,
+       -2.27282088e-02, -5.14086811e-03,  2.63669257e-02,  3.89854969e-02,
+        1.17925597e-02, -2.72827510e-02,  6.35790925e-03,  3.40425363e-02,
+        3.93589185e-02,  3.31425044e-02,  3.26526118e-02,  4.23271807e-03,
+        3.74840496e-02,  3.93833440e-02,  3.88174200e-02,  4.93203196e-02,
+        2.62631597e-02,  3.88790920e-02])
diff --git a/utils/__pycache__/reward.cpython-37.pyc b/utils/__pycache__/reward.cpython-37.pyc
diff --git a/utils/reward.py b/utils/reward.py
@@ -21,8 +21,8 @@ def __init__(self, dx, dv, env):
         self.zx = - sp.low[0]  # zero of the position interval
         self.zv = - sp.low[1] # zero of the velocity interval
         # tune sigma according to the discretization
-        self.sigma_inv = inv(np.array([[.05, 0.  ],
-                                      [0., .0003]])) 
+        self.sigma_inv = inv(np.array([[0.5*(self.lx/self.dx)**2, 0.  ],
+                                      [0., 0.5*(self.lv/self.dv)**2]])) 
         self.params = np.zeros(dx * dv)
     
     def value(self, state, action):