We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e6e59bc commit 0cc226cCopy full SHA for 0cc226c
contents/1_command_line_reinforcement_learning/treasure_on_right.py
@@ -34,7 +34,7 @@ def build_q_table(n_states, actions):
34
def choose_action(state, q_table):
35
# This is how to choose an action
36
state_actions = q_table.iloc[state, :]
37
- if (np.random.uniform() > EPSILON) or (state_actions.all() == 0): # act non-greedy or state-action have no value
+ if (np.random.uniform() > EPSILON) or (not state_actions.any()): # act non-greedy or state-action have no value
38
action_name = np.random.choice(ACTIONS)
39
else: # act greedy
40
action_name = state_actions.idxmax() # replace argmax to idxmax as argmax means a different function in newer version of pandas
0 commit comments