@@ -13,12 +13,13 @@ class Reward():
13
13
dv -> number of basis functions on the velocity dimension.
14
14
"""
15
15
def __init__ (self , dx , dv , env ):
16
+ sp = env .observation_space
16
17
self .dx = dx
17
18
self .dv = dv
18
- self .lx = env .low [0 ] # length of the position interval
19
- self .lv = env .low [1 ] # length of the velocity interval
20
- self .zx = env . high [0 ] # zero of the position interval
21
- self .zv = env . high [1 ] # zero of the velocity interval
19
+ self .lx = sp . high [ 0 ] - sp .low [0 ] # length of the position interval
20
+ self .lv = sp . high [ 1 ] - sp .low [1 ] # length of the velocity interval
21
+ self .zx = - sp . low [0 ] # zero of the position interval
22
+ self .zv = - sp . low [1 ] # zero of the velocity interval
22
23
# tune sigma according to the discretization
23
24
self .sigma_inv = inv (np .array ([[.05 , 0. ],
24
25
[0. , .0003 ]]))
@@ -32,7 +33,7 @@ def value(self, state, action):
32
33
33
34
def basis (self , state , idx ):
34
35
j = idx % self .dv
35
- i = (idx - j )/ self .dv
36
+ i = (idx - j )// self .dv
36
37
x , v = state
37
38
xi = i / (self .dx - 1 ) * self .lx - self .zx
38
39
vj = j / (self .dv - 1 ) * self .lv - self .zv
@@ -42,7 +43,7 @@ def basis(self, state, idx):
42
43
43
44
def partial_value (self , state , action , idx ):
44
45
j = idx % self .dv
45
- i = (idx - j )/ self .dv
46
+ i = (idx - j )// self .dv
46
47
return self .params [idx ] * self .basis (state , i , j )
47
48
48
49
def partial_traj (self , traj , idx ):
@@ -71,12 +72,12 @@ def export_to_file(self, file_path):
71
72
def plot (self ):
72
73
ax = fig .gca (projection = '3d' )
73
74
for i in range (self .dx ):
74
- for j in range (self .dv ):
75
- xi = i / (X - 1 ) * 1.8 - 0.6
76
- vj = j / (V - 1 ) * 0.14 - 0.07
77
- r [i , j ] = reward .value ([xi , vj ], 1 )
78
- ax .plot_surface (x , v , r .T , cmap = cm .coolwarm ,
79
- linewidth = 0 , antialiased = False )
75
+ for j in range (self .dv ):
76
+ xi = i / (X - 1 ) * 1.8 - 0.6
77
+ vj = j / (V - 1 ) * 0.14 - 0.07
78
+ r [i , j ] = reward .value ([xi , vj ], 1 )
79
+ ax .plot_surface (x , v , r .T , cmap = cm .coolwarm ,
80
+ linewidth = 0 , antialiased = False )
80
81
81
82
plt .show ()
82
83
0 commit comments