|
17 | 17 |
|
18 | 18 | GEAR = 2
|
19 | 19 |
|
| 20 | +reducer_func = lambda x: ( x * .95 // 0.001) * 0.001 |
| 21 | + |
20 | 22 | ALL_STATES = ['light', 'oncoming', 'left', 'right']
|
21 | 23 | ALL_ACTIONS = [None, 'forward', 'left', 'right']
|
22 | 24 |
|
@@ -52,31 +54,37 @@ def reset(self, destination=None):
|
52 | 54 | * Epsilon(exploration factor): higher the randomness higher the exploration
|
53 | 55 | '''
|
54 | 56 | self.planner.route_to(destination)
|
55 |
| - # pdb.set_trace() |
56 |
| - # TODO: Prepare for a new trip; reset any variables here, if required |
| 57 | + random.shuffle(ALL_ACTIONS) |
57 | 58 | self.prev_state = None
|
58 | 59 | self.prev_reward = None
|
59 | 60 | self.prev_action = None
|
60 | 61 |
|
| 62 | + # learning rate |
61 | 63 | if self.counter < 50:
|
62 | 64 | self.alpha -= 0.01
|
63 | 65 | # self.alpha = (self.alpha // 0.0001) * 0.0001
|
64 | 66 |
|
65 |
| - if self.counter < 0: |
| 67 | + # long term focus |
| 68 | + if self.counter < 20: |
66 | 69 | self.gamma += 0.01
|
67 | 70 |
|
| 71 | + # randomness |
68 | 72 | if self.epsilon < 0:
|
69 | 73 | # self.epsilon -= 0.01
|
70 |
| - self.epsilon = (self.epsilon // 0.0001) * 0.0001 |
| 74 | + self.epsilon = (self.epsilon * .9 // 0.0001) * 0.0001 |
71 | 75 |
|
| 76 | + if self.counter >= 10 * 98: |
| 77 | + pprint(self.Q) |
| 78 | + # pdb.set_trace() |
72 | 79 |
|
73 |
| - self.counter += 50 |
| 80 | + self.counter += 10 |
74 | 81 |
|
75 | 82 | def get_q_val(self, state, action):
|
76 | 83 | try:
|
77 | 84 | return self.Q[(self.state, action)]
|
78 | 85 | except KeyError:
|
79 |
| - self.Q[(self.state, action)] = 0 |
| 86 | + # optimism in the face of uncertainty |
| 87 | + self.Q[(self.state, action)] = 0.05 |
80 | 88 | return 0
|
81 | 89 |
|
82 | 90 | def best_q_action(self, s):
|
@@ -158,12 +166,19 @@ def run():
|
158 | 166 | e.set_primary_agent(a, enforce_deadline=True) # specify agent to track
|
159 | 167 | # NOTE: You can set enforce_deadline=False while debugging to allow longer trials
|
160 | 168 |
|
161 |
| - # Now simulate it |
162 |
| - sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) |
163 |
| - # NOTE: To speed up simulation, reduce update_delay and/or set display=False |
| 169 | + show = False |
| 170 | + # NOTE: To speed up simulation, set show = False. |
| 171 | + # NOTE: To show the GUI, set show = True |
| 172 | + |
| 173 | + if show: |
| 174 | + # Now simulate it |
| 175 | + sim = Simulator(e, update_delay=0.5, display=True) # create simulator (uses pygame when display=True, if available) |
| 176 | + sim.run(n_trials=100) # run for a specified number of trials |
| 177 | + # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line |
| 178 | + else: |
| 179 | + sim = Simulator(e, update_delay=0.0, display=False) |
| 180 | + sim.run(n_trials=100) |
164 | 181 |
|
165 |
| - sim.run(n_trials=400) # run for a specified number of trials |
166 |
| - # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line |
167 | 182 |
|
168 | 183 |
|
169 | 184 | if __name__ == '__main__':
|
|
0 commit comments