File tree Expand file tree Collapse file tree 3 files changed +6
-6
lines changed Expand file tree Collapse file tree 3 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -23,8 +23,8 @@ def choose_action(self, observation):
23
23
if np .random .uniform () < self .epsilon :
24
24
# choose best action
25
25
state_action = self .q_table .loc [observation , :]
26
- state_action = state_action . reindex ( np . random . permutation ( state_action . index )) # some actions have same value
27
- action = state_action . idxmax ( )
26
+ # some actions may have the same value, randomly choose on in these actions
27
+ action = np . random . choice ( state_action [ state_action == np . max ( state_action )]. index )
28
28
else :
29
29
# choose random action
30
30
action = np .random .choice (self .actions )
Original file line number Diff line number Diff line change @@ -35,8 +35,8 @@ def choose_action(self, observation):
35
35
if np .random .rand () < self .epsilon :
36
36
# choose best action
37
37
state_action = self .q_table .loc [observation , :]
38
- state_action = state_action . reindex ( np . random . permutation ( state_action . index )) # some actions have same value
39
- action = state_action . idxmax ( )
38
+ # some actions may have the same value, randomly choose on in these actions
39
+ action = np . random . choice ( state_action [ state_action == np . max ( state_action )]. index )
40
40
else :
41
41
# choose random action
42
42
action = np .random .choice (self .actions )
Original file line number Diff line number Diff line change @@ -35,8 +35,8 @@ def choose_action(self, observation):
35
35
if np .random .rand () < self .epsilon :
36
36
# choose best action
37
37
state_action = self .q_table .loc [observation , :]
38
- state_action = state_action . reindex ( np . random . permutation ( state_action . index )) # some actions have same value
39
- action = state_action . idxmax ( )
38
+ # some actions may have the same value, randomly choose on in these actions
39
+ action = np . random . choice ( state_action [ state_action == np . max ( state_action )]. index )
40
40
else :
41
41
# choose random action
42
42
action = np .random .choice (self .actions )
You can’t perform that action at this time.
0 commit comments