Skip to content

Commit 853be19

Browse files
author
User
committed
comment
1 parent 8335b15 commit 853be19

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

rl/monte_carlo.py

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def play_game(grid, policy, max_steps=20):
4040
break
4141

4242
# calculate the returns by working backwards from the terminal state
43+
44+
# we want to return:
45+
# states = [s(0), s(1), ..., s(T-1)]
46+
# returns = [G(0), G(1), ..., G(T-1)]
4347
G = 0
4448
states_and_returns = []
4549
first = True

rl/monte_carlo2.py

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ def play_game(grid, policy, max_steps=20):
4848
# note: there is no need to store the final terminal state
4949
s = next_s
5050

51+
# we want to return:
52+
# states = [s(0), s(1), ..., S(T-1)]
53+
# rewards = [R(1), R(2), ..., R(T) ]
54+
5155
return states, rewards
5256

5357

0 commit comments

Comments
 (0)