We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents e6e59bc + f677ef2 commit ed018b4Copy full SHA for ed018b4
contents/1_command_line_reinforcement_learning/treasure_on_right.py
@@ -34,7 +34,7 @@ def build_q_table(n_states, actions):
34
def choose_action(state, q_table):
35
# This is how to choose an action
36
state_actions = q_table.iloc[state, :]
37
- if (np.random.uniform() > EPSILON) or (state_actions.all() == 0): # act non-greedy or state-action have no value
+ if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()): # act non-greedy or state-action have no value
38
action_name = np.random.choice(ACTIONS)
39
else: # act greedy
40
action_name = state_actions.idxmax() # replace argmax to idxmax as argmax means a different function in newer version of pandas
0 commit comments