We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 04a2c19 commit dc7713cCopy full SHA for dc7713c
rl/grid_world.py
@@ -169,7 +169,8 @@ def move(self, action):
169
next_state_probs = self.probs[(s, a)]
170
next_states = list(next_state_probs.keys())
171
next_probs = list(next_state_probs.values())
172
- s2 = np.random.choice(next_states, p=next_probs)
+ next_state_idx = np.random.choice(len(next_states), p=next_probs)
173
+ s2 = next_states[next_state_idx]
174
175
# update the current state
176
self.i, self.j = s2
0 commit comments