Skip to content

Commit a8954c1

Browse files
committed
fix math align
1 parent 9b2dc5a commit a8954c1

File tree

34 files changed

+1188
-1093
lines changed

34 files changed

+1188
-1093
lines changed

docs/adaptive_computation/ponder_net/index.html

Lines changed: 71 additions & 71 deletions
Large diffs are not rendered by default.

docs/cfr/index.html

Lines changed: 142 additions & 142 deletions
Large diffs are not rendered by default.

docs/diffusion/ddpm/evaluate.html

Lines changed: 127 additions & 104 deletions
Large diffs are not rendered by default.

docs/diffusion/ddpm/unet.html

Lines changed: 154 additions & 153 deletions
Large diffs are not rendered by default.

docs/gan/cycle_gan/index.html

Lines changed: 108 additions & 106 deletions
Large diffs are not rendered by default.

docs/hypernetworks/hyper_lstm.html

Lines changed: 83 additions & 82 deletions
Large diffs are not rendered by default.

docs/lstm/index.html

Lines changed: 46 additions & 46 deletions
Large diffs are not rendered by default.

docs/normalization/batch_channel_norm/index.html

Lines changed: 31 additions & 30 deletions
Large diffs are not rendered by default.

docs/optimizers/amsgrad.html

Lines changed: 30 additions & 36 deletions
Large diffs are not rendered by default.

docs/optimizers/radam.html

Lines changed: 104 additions & 73 deletions
Large diffs are not rendered by default.

docs/recurrent_highway_networks/index.html

Lines changed: 33 additions & 31 deletions
Large diffs are not rendered by default.

docs/rl/dqn/index.html

Lines changed: 23 additions & 23 deletions
Large diffs are not rendered by default.

docs/rl/ppo/gae.html

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ <h3>Calculate advantages</h3>
127127

128128
</div>
129129
<div class='code'>
130-
<div class="highlight"><pre><span class="lineno">58</span> <span class="n">advantages</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">n_workers</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">worker_steps</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
131-
<span class="lineno">59</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="mi">0</span></pre></div>
130+
<div class="highlight"><pre><span class="lineno">59</span> <span class="n">advantages</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">n_workers</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">worker_steps</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
131+
<span class="lineno">60</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="mi">0</span></pre></div>
132132
</div>
133133
</div>
134134
<div class='section' id='section-5'>
@@ -140,9 +140,9 @@ <h3>Calculate advantages</h3>
140140

141141
</div>
142142
<div class='code'>
143-
<div class="highlight"><pre><span class="lineno">62</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[:,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]</span>
144-
<span class="lineno">63</span>
145-
<span class="lineno">64</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">worker_steps</span><span class="p">)):</span></pre></div>
143+
<div class="highlight"><pre><span class="lineno">63</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[:,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]</span>
144+
<span class="lineno">64</span>
145+
<span class="lineno">65</span> <span class="k">for</span> <span class="n">t</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">worker_steps</span><span class="p">)):</span></pre></div>
146146
</div>
147147
</div>
148148
<div class='section' id='section-6'>
@@ -154,9 +154,9 @@ <h3>Calculate advantages</h3>
154154

155155
</div>
156156
<div class='code'>
157-
<div class="highlight"><pre><span class="lineno">66</span> <span class="n">mask</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">-</span> <span class="n">done</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span>
158-
<span class="lineno">67</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">last_value</span> <span class="o">*</span> <span class="n">mask</span>
159-
<span class="lineno">68</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="n">last_advantage</span> <span class="o">*</span> <span class="n">mask</span></pre></div>
157+
<div class="highlight"><pre><span class="lineno">67</span> <span class="n">mask</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">-</span> <span class="n">done</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span>
158+
<span class="lineno">68</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">last_value</span> <span class="o">*</span> <span class="n">mask</span>
159+
<span class="lineno">69</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="n">last_advantage</span> <span class="o">*</span> <span class="n">mask</span></pre></div>
160160
</div>
161161
</div>
162162
<div class='section' id='section-7'>
@@ -168,7 +168,7 @@ <h3>Calculate advantages</h3>
168168

169169
</div>
170170
<div class='code'>
171-
<div class="highlight"><pre><span class="lineno">70</span> <span class="n">delta</span> <span class="o">=</span> <span class="n">rewards</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">gamma</span> <span class="o">*</span> <span class="n">last_value</span> <span class="o">-</span> <span class="n">values</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span></pre></div>
171+
<div class="highlight"><pre><span class="lineno">71</span> <span class="n">delta</span> <span class="o">=</span> <span class="n">rewards</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">gamma</span> <span class="o">*</span> <span class="n">last_value</span> <span class="o">-</span> <span class="n">values</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span></pre></div>
172172
</div>
173173
</div>
174174
<div class='section' id='section-8'>
@@ -180,7 +180,7 @@ <h3>Calculate advantages</h3>
180180

181181
</div>
182182
<div class='code'>
183-
<div class="highlight"><pre><span class="lineno">73</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="n">delta</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">gamma</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">lambda_</span> <span class="o">*</span> <span class="n">last_advantage</span></pre></div>
183+
<div class="highlight"><pre><span class="lineno">74</span> <span class="n">last_advantage</span> <span class="o">=</span> <span class="n">delta</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">gamma</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">lambda_</span> <span class="o">*</span> <span class="n">last_advantage</span></pre></div>
184184
</div>
185185
</div>
186186
<div class='section' id='section-9'>
@@ -192,11 +192,11 @@ <h3>Calculate advantages</h3>
192192

193193
</div>
194194
<div class='code'>
195-
<div class="highlight"><pre><span class="lineno">82</span> <span class="n">advantages</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">last_advantage</span>
196-
<span class="lineno">83</span>
197-
<span class="lineno">84</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span>
198-
<span class="lineno">85</span>
199-
<span class="lineno">86</span> <span class="k">return</span> <span class="n">advantages</span></pre></div>
195+
<div class="highlight"><pre><span class="lineno">83</span> <span class="n">advantages</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span> <span class="o">=</span> <span class="n">last_advantage</span>
196+
<span class="lineno">84</span>
197+
<span class="lineno">85</span> <span class="n">last_value</span> <span class="o">=</span> <span class="n">values</span><span class="p">[:,</span> <span class="n">t</span><span class="p">]</span>
198+
<span class="lineno">86</span>
199+
<span class="lineno">87</span> <span class="k">return</span> <span class="n">advantages</span></pre></div>
200200
</div>
201201
</div>
202202
<div class='footer'>

docs/rl/ppo/index.html

Lines changed: 20 additions & 20 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)