@@ -127,8 +127,8 @@ <h3>Calculate advantages</h3>
127
127
128
128
</ div >
129
129
< div class ='code '>
130
- < div class ="highlight "> < pre > < span class ="lineno "> 58 </ span > < span class ="n "> advantages</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> zeros</ span > < span class ="p "> ((</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> n_workers</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> worker_steps</ span > < span class ="p "> ),</ span > < span class ="n "> dtype</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> float32</ span > < span class ="p "> )</ span >
131
- < span class ="lineno "> 59 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="mi "> 0</ span > </ pre > </ div >
130
+ < div class ="highlight "> < pre > < span class ="lineno "> 59 </ span > < span class ="n "> advantages</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> zeros</ span > < span class ="p "> ((</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> n_workers</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> worker_steps</ span > < span class ="p "> ),</ span > < span class ="n "> dtype</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> float32</ span > < span class ="p "> )</ span >
131
+ < span class ="lineno "> 60 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="mi "> 0</ span > </ pre > </ div >
132
132
</ div >
133
133
</ div >
134
134
< div class ='section ' id ='section-5 '>
@@ -140,9 +140,9 @@ <h3>Calculate advantages</h3>
140
140
141
141
</ div >
142
142
< div class ='code '>
143
- < div class ="highlight "> < pre > < span class ="lineno "> 62 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="o "> -</ span > < span class ="mi "> 1</ span > < span class ="p "> ]</ span >
144
- < span class ="lineno "> 63 </ span >
145
- < span class ="lineno "> 64 </ span > < span class ="k "> for</ span > < span class ="n "> t</ span > < span class ="ow "> in</ span > < span class ="nb "> reversed</ span > < span class ="p "> (</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> worker_steps</ span > < span class ="p "> )):</ span > </ pre > </ div >
143
+ < div class ="highlight "> < pre > < span class ="lineno "> 63 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="o "> -</ span > < span class ="mi "> 1</ span > < span class ="p "> ]</ span >
144
+ < span class ="lineno "> 64 </ span >
145
+ < span class ="lineno "> 65 </ span > < span class ="k "> for</ span > < span class ="n "> t</ span > < span class ="ow "> in</ span > < span class ="nb "> reversed</ span > < span class ="p "> (</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> worker_steps</ span > < span class ="p "> )):</ span > </ pre > </ div >
146
146
</ div >
147
147
</ div >
148
148
< div class ='section ' id ='section-6 '>
@@ -154,9 +154,9 @@ <h3>Calculate advantages</h3>
154
154
155
155
</ div >
156
156
< div class ='code '>
157
- < div class ="highlight "> < pre > < span class ="lineno "> 66 </ span > < span class ="n "> mask</ span > < span class ="o "> =</ span > < span class ="mf "> 1.0</ span > < span class ="o "> -</ span > < span class ="n "> done</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span >
158
- < span class ="lineno "> 67 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> last_value</ span > < span class ="o "> *</ span > < span class ="n "> mask</ span >
159
- < span class ="lineno "> 68 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="n "> last_advantage</ span > < span class ="o "> *</ span > < span class ="n "> mask</ span > </ pre > </ div >
157
+ < div class ="highlight "> < pre > < span class ="lineno "> 67 </ span > < span class ="n "> mask</ span > < span class ="o "> =</ span > < span class ="mf "> 1.0</ span > < span class ="o "> -</ span > < span class ="n "> done</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span >
158
+ < span class ="lineno "> 68 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> last_value</ span > < span class ="o "> *</ span > < span class ="n "> mask</ span >
159
+ < span class ="lineno "> 69 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="n "> last_advantage</ span > < span class ="o "> *</ span > < span class ="n "> mask</ span > </ pre > </ div >
160
160
</ div >
161
161
</ div >
162
162
< div class ='section ' id ='section-7 '>
@@ -168,7 +168,7 @@ <h3>Calculate advantages</h3>
168
168
169
169
</ div >
170
170
< div class ='code '>
171
- < div class ="highlight "> < pre > < span class ="lineno "> 70 </ span > < span class ="n "> delta</ span > < span class ="o "> =</ span > < span class ="n "> rewards</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > < span class ="o "> +</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> gamma</ span > < span class ="o "> *</ span > < span class ="n "> last_value</ span > < span class ="o "> -</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > </ pre > </ div >
171
+ < div class ="highlight "> < pre > < span class ="lineno "> 71 </ span > < span class ="n "> delta</ span > < span class ="o "> =</ span > < span class ="n "> rewards</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > < span class ="o "> +</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> gamma</ span > < span class ="o "> *</ span > < span class ="n "> last_value</ span > < span class ="o "> -</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > </ pre > </ div >
172
172
</ div >
173
173
</ div >
174
174
< div class ='section ' id ='section-8 '>
@@ -180,7 +180,7 @@ <h3>Calculate advantages</h3>
180
180
181
181
</ div >
182
182
< div class ='code '>
183
- < div class ="highlight "> < pre > < span class ="lineno "> 73 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="n "> delta</ span > < span class ="o "> +</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> gamma</ span > < span class ="o "> *</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> lambda_</ span > < span class ="o "> *</ span > < span class ="n "> last_advantage</ span > </ pre > </ div >
183
+ < div class ="highlight "> < pre > < span class ="lineno "> 74 </ span > < span class ="n "> last_advantage</ span > < span class ="o "> =</ span > < span class ="n "> delta</ span > < span class ="o "> +</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> gamma</ span > < span class ="o "> *</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> lambda_</ span > < span class ="o "> *</ span > < span class ="n "> last_advantage</ span > </ pre > </ div >
184
184
</ div >
185
185
</ div >
186
186
< div class ='section ' id ='section-9 '>
@@ -192,11 +192,11 @@ <h3>Calculate advantages</h3>
192
192
193
193
</ div >
194
194
< div class ='code '>
195
- < div class ="highlight "> < pre > < span class ="lineno "> 82 </ span > < span class ="n "> advantages</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="n "> last_advantage</ span >
196
- < span class ="lineno "> 83 </ span >
197
- < span class ="lineno "> 84 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span >
198
- < span class ="lineno "> 85 </ span >
199
- < span class ="lineno "> 86 </ span > < span class ="k "> return</ span > < span class ="n "> advantages</ span > </ pre > </ div >
195
+ < div class ="highlight "> < pre > < span class ="lineno "> 83 </ span > < span class ="n "> advantages</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="n "> last_advantage</ span >
196
+ < span class ="lineno "> 84 </ span >
197
+ < span class ="lineno "> 85 </ span > < span class ="n "> last_value</ span > < span class ="o "> =</ span > < span class ="n "> values</ span > < span class ="p "> [:,</ span > < span class ="n "> t</ span > < span class ="p "> ]</ span >
198
+ < span class ="lineno "> 86 </ span >
199
+ < span class ="lineno "> 87 </ span > < span class ="k "> return</ span > < span class ="n "> advantages</ span > </ pre > </ div >
200
200
</ div >
201
201
</ div >
202
202
< div class ='footer '>
0 commit comments