Skip to content

Commit 1faef74

Browse files
committed
fix selecting action
1 parent a3ee123 commit 1faef74

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

contents/2_Q_Learning_maze/RL_brain.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def choose_action(self, observation):
2323
if np.random.uniform() < self.epsilon:
2424
# choose best action
2525
state_action = self.q_table.loc[observation, :]
26-
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
27-
action = state_action.idxmax()
26+
# some actions may have the same value, randomly choose on in these actions
27+
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
2828
else:
2929
# choose random action
3030
action = np.random.choice(self.actions)

contents/3_Sarsa_maze/RL_brain.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def choose_action(self, observation):
3535
if np.random.rand() < self.epsilon:
3636
# choose best action
3737
state_action = self.q_table.loc[observation, :]
38-
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
39-
action = state_action.idxmax()
38+
# some actions may have the same value, randomly choose on in these actions
39+
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
4040
else:
4141
# choose random action
4242
action = np.random.choice(self.actions)

contents/4_Sarsa_lambda_maze/RL_brain.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def choose_action(self, observation):
3535
if np.random.rand() < self.epsilon:
3636
# choose best action
3737
state_action = self.q_table.loc[observation, :]
38-
state_action = state_action.reindex(np.random.permutation(state_action.index)) # some actions have same value
39-
action = state_action.idxmax()
38+
# some actions may have the same value, randomly choose on in these actions
39+
action = np.random.choice(state_action[state_action == np.max(state_action)].index)
4040
else:
4141
# choose random action
4242
action = np.random.choice(self.actions)

0 commit comments

Comments
 (0)