Skip to content

Commit 4d52778

Browse files
committed
Smart Cab: Adding a smart cab log analyser and first mission report file
1 parent 5a6e1f9 commit 4d52778

File tree

2 files changed

+374
-0
lines changed

2 files changed

+374
-0
lines changed
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
'''
2+
3+
Welcome to Smart Cab Log Analyser !!!
4+
5+
This is a simple script to do ETL on Agent.py log/output data to analyse the performance of Agent Learning
6+
7+
* trail : success or failure
8+
* deadline : last deadline value
9+
* reward : last reward value
10+
11+
You can redirect the trails logs posted by smartcab agent to a log_file and provide it as a input to analyse.
12+
13+
Note:
14+
* for learning update outformat is expecter as below. You need to add EXPECTED_REWARD to the following
15+
16+
LearningAgent.update(): deadline = 30, inputs = {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, action = None, expected_reward = 0, reward = 0.0
17+
'''
18+
19+
import sys
20+
21+
from re import findall as re_findall
22+
23+
import pandas as pd
24+
25+
FILE = 'smartcab/test_log100'
26+
27+
def fetch_data(filename):
28+
data = open(filename).readlines()
29+
return data
30+
31+
def fetch_value(inp_str):
32+
return float(inp_str.split('=')[-1])
33+
34+
def check_environment_reset(env_reset):
35+
data = []
36+
start, end, deadline = '', '', ''
37+
if env_reset.startswith('Environment.reset()'):
38+
data = re_findall('[\w.]+', env_reset)
39+
for i, each in enumerate(data):
40+
if each == 'start':
41+
start = map(int, [data[i + 1], data[i + 2]])
42+
elif each == 'destination':
43+
end = map(int, [data[i + 1], data[i + 2]])
44+
elif each == 'deadline':
45+
deadline = int(data[i+1])
46+
return start, end, deadline
47+
48+
def check_learning_update(learning_update):
49+
deadline, expected_reward, reward = 0, 0, 0
50+
data = []
51+
if learning_update.startswith('LearningAgent.update()'):
52+
data = re_findall('[\w.]+', learning_update)
53+
for i, each in enumerate(data):
54+
if each == 'deadline':
55+
deadline = data[i + 1]
56+
elif each == 'expected_reward':
57+
expected_reward = data[i + 1]
58+
elif each == 'reward':
59+
reward = data[i + 1]
60+
return deadline, expected_reward, reward
61+
62+
63+
def check_learning_update_old(learning_update):
64+
deadline, expected_reward, reward = 0, 0, 0
65+
if learning_update.startswith('LearningAgent.update()'):
66+
learning_update = learning_update[24:].split(', ')
67+
# values
68+
deadline = fetch_value(learning_update[0])
69+
expected_reward = fetch_value(learning_update[-2])
70+
reward = fetch_value(learning_update[-1])
71+
return deadline, expected_reward, reward
72+
73+
74+
def success_check(data):
75+
'''
76+
Sequentially Parses the log and fetches data as and when we get
77+
'''
78+
all_outcomes = []
79+
all_trails = []
80+
all_deadlines = []
81+
all_expected_rewards = []
82+
# vars
83+
all_rewards = []
84+
all_deadlines = []
85+
all_start = []
86+
all_destinations = []
87+
all_main_deadlines = []
88+
# vars
89+
trail_last_update = False
90+
tmp_success = -1
91+
counter = 0
92+
reached_msg, aborted_msg = 'Primary agent has reached destination', 'Trial aborted'
93+
94+
for i, each in enumerate(data):
95+
if each.startswith('Environment.reset()'):
96+
counter += 1
97+
start_point, destination_point, main_deadline = check_environment_reset(each)
98+
99+
if each.startswith('LearningAgent.update()'):
100+
deadline, expected_reward, reward = check_learning_update(each)
101+
#
102+
all_trails.append(counter)
103+
all_outcomes.append(-1)
104+
all_rewards.append(reward)
105+
#
106+
all_expected_rewards.append(expected_reward)
107+
all_start.append(start_point)
108+
all_destinations.append(destination_point)
109+
all_deadlines.append(deadline)
110+
all_main_deadlines.append(main_deadline)
111+
112+
if trail_last_update:
113+
trail_last_update = False
114+
all_outcomes[-1] = tmp_success
115+
tmp_success = -1
116+
# trail end check - if it is
117+
reached_chk = reached_msg in each
118+
aborted_chk = aborted_msg in each
119+
# trail ended
120+
if reached_chk or aborted_chk:
121+
trail_last_update = True
122+
if reached_chk:
123+
tmp_success = 1
124+
else:
125+
tmp_success = 0
126+
ret_dict = {
127+
'all_trails' : all_trails,
128+
'all_outcomes' : all_outcomes,
129+
'all_rewards' : all_rewards,
130+
'all_deadlines' : all_deadlines,
131+
'all_expected_rewards' : all_expected_rewards,
132+
'all_start' : all_start,
133+
'all_destinations' : all_destinations,
134+
'all_main_deadline' : all_main_deadlines
135+
}
136+
return ret_dict
137+
138+
139+
def total_stats(filename=FILE):
140+
data = fetch_data(filename)
141+
game = success_check(data)
142+
game_stats = pd.DataFrame.from_dict(game)
143+
144+
for col in [u'all_deadlines', u'all_expected_rewards', u'all_trails',
145+
u'all_main_deadline', u'all_outcomes', u'all_rewards', # u'all_start', u'all_destinations'
146+
]:
147+
game_stats[col] = pd.to_numeric(game_stats[col])
148+
149+
game_stats['Q_pred'] = game_stats.all_expected_rewards - game_stats.all_outcomes
150+
game_stats['steps'] = game_stats.all_main_deadline - game_stats.all_deadlines
151+
game_stats['avg_steps'] = game_stats.all_rewards / game_stats.steps
152+
153+
#
154+
# Checking how many reached desctination
155+
#
156+
rewarded_deadlines = (game_stats.all_rewards >= 12) & (game_stats.all_deadlines >= 0)
157+
print '\nNo.of Successfully Trips is: %s' % game_stats[rewarded_deadlines].all_rewards.count()
158+
print 'Total reward sum: %s' % game_stats.all_rewards.sum()
159+
print 'Total time saved: %s' % int(game_stats[['steps']][rewarded_deadlines].sum())
160+
161+
if __name__ == '__main__':
162+
if len(sys.argv) > 1:
163+
filename = str(sys.argv[1])
164+
total_stats(filename)
165+
else:
166+
total_stats('q_log02')
167+
168+
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
Smart Cab - Agent
2+
3+
4+
QUESTION: Observe what you see with the agent's behavior as it takes random actions. Does the smartcab eventually make it to the destination? Are there any other interesting observations to note?
5+
6+
Yes, after taking random actions also smartcab eventually makes it to the destination. I believe this is due to lack to deadline enforcement.
7+
8+
Some expected observations are
9+
* Takes lot of time & step compared to Q learning SmartCab.
10+
* Prone to lot of accidents
11+
12+
13+
QUESTION: What states have you identified that are appropriate for modeling the smartcab and environment? Why do you believe each of these states to be appropriate for this problem?
14+
15+
OPTIONAL: How many states in total exist for the smartcab in this environment? Does this number seem reasonable given that the goal of Q-Learning is to learn and make informed decisions about each state? Why or why not?
16+
17+
As per my program, a state consists of following
18+
* next_waypoint - (None, left, forward, right)
19+
* inputs - light - (Green, Red)
20+
* inputs - left - (None, left, forward, right)
21+
* inputs - forward - (None, left, forward, right)
22+
* inputs - right - (None, left, forward, right)
23+
24+
Total states identified = 4 * 2 * 4 * 4 * 4 = 512.
25+
26+
I believe it’s only a natural instinct to check Safety and Precaution, to be cautious/alert when we are closer to dangers. So while we are driving, I see the possible main reason of getting hurt(accident) could be is other car drivers does not know I am driving in this direction and failed to follow traffic signals. Although if other drivers does not follow or mis-interrupt traffic rules then we might end in a incident, so hope for now case study let assume that all other drivers are well trained algorithms and lets do our part of safety control :)
27+
28+
So in an intersection, checking left, right and forward direction is important. In similar way, input - light is important for us to follow and not to make an accident.
29+
30+
Now that we all the basic details for Safety and Precaution, we are only need one basic detail like where to go(next_waypoint).
31+
32+
In total we need these 5 inputs are must to keep our driver and my customer(you) safe and reach destination. So if a Q learning machine needs to my drivers job then it need to what I know right?
33+
34+
35+
QUESTION: What changes do you notice in the agent's behavior when compared to the basic driving agent when random actions were always taken? Why is this behavior occurring?
36+
37+
Observation in my test trial with Q learning
38+
* I see that 99 trail finished before deadline.
39+
* Q learning was able to complete Trails runs for 100 faster than random logic.
40+
41+
42+
QUESTION: Report the different values for the parameters tuned in your basic implementation of Q-Learning. For which set of parameters does the agent perform best? How well does the final driving agent perform?
43+
44+
QUESTION: Does your agent get close to finding an optimal policy, i.e. reach the destination in the minimum possible time, and not incur any penalties? How would you describe an optimal policy for this problem?
45+
46+
Before we find an optimal policy, let have some key important roles that are required for a (smart) cab driver.
47+
48+
1. Driver does follow all traffic rules ==> No.of -tives records should less. We would prefer to have zero but learning requires exploring & failing is a part our daily learning curves
49+
2. Driver's -tive should reduce as trails increase
50+
3. Reaches to destination
51+
4. Reaches to destination early if possible
52+
53+
So, I have parameters alpha, gamma, epsilon which control the learning behaving of Q and these get updated at two place. One is when our agent is in the initialisation stage and second is during each reset of agent, we increment or decrement these initial values.
54+
55+
56+
If I/you were that smart program, it would be wise to try different combination during initial start with good learning curve and as you learn and more experienced you already would have learned all basics so you try to do better performance experience and less randomness & learning curve.
57+
58+
So I kept the learning curve (self.alpha) higher than .5 and I experimented with gamma and epsilon. These the best combination that gave more satisfactory scores are these
59+
60+
# initial settings
61+
self.alpha = 1.0
62+
self.gamma = 0.75
63+
self.epsilon = 0.4
64+
65+
# improvements during reset
66+
if self.alpha > .55:
67+
self.alpha -= 0.01
68+
self.gamma += 0.02
69+
self.epsilon += 0.01
70+
71+
72+
73+
Other experiments I have tried
74+
75+
Checking Initial values for following reset parameters
76+
77+
self.alpha -= 0.01
78+
self.gamma += 0.01
79+
self.epsilon += 0.01
80+
81+
Case 0:
82+
83+
self.alpha = 1.0
84+
self.gamma = 0.1
85+
self.epsilon = 0.1
86+
87+
No.of Successfully Trips is: 13
88+
Total reward sum: 586.0
89+
90+
No.of Successfully Trips is: 38
91+
Total reward sum: 1205.5
92+
93+
94+
Case 1: Conclusion: given other factor are constant, therein in an increase in score if initial gamma is higher(>0.1)
95+
96+
No.of Successfully Trips is: 61
97+
Total reward sum: 1734.5
98+
99+
No.of Successfully Trips is: 65
100+
Total reward sum: 1736.0
101+
102+
self.alpha = 1.0
103+
self.gamma = 0.5
104+
self.epsilon = 0.1
105+
106+
Case 2: Conclusion: given other factor are constant, therein in an increase in score if initial epsilon is higher(>0.1)
107+
108+
No.of Successfully Trips is: 42
109+
Total reward sum: 1290.0
110+
111+
No.of Successfully Trips is: 47
112+
Total reward sum: 1381.5
113+
114+
self.alpha = 1.0
115+
self.gamma = 0.1
116+
self.epsilon = 0.5
117+
118+
119+
Case 3:
120+
121+
self.alpha = 1.5
122+
self.gamma = 0.1
123+
self.epsilon = 0.1
124+
125+
No.of Successfully Trips is: 95
126+
Total reward sum: 2485.5
127+
128+
Case 4:
129+
130+
self.alpha = 1.5
131+
self.gamma = 0.75
132+
self.epsilon = 0.5
133+
134+
No.of Successfully Trips is: 99
135+
Total reward sum: 2336.5
136+
137+
138+
Case 5:
139+
140+
self.alpha = 1.0
141+
self.gamma = 0.75
142+
self.epsilon = 0.4
143+
144+
if self.alpha > .55:
145+
self.alpha -= 0.01
146+
self.gamma += 0.01
147+
self.epsilon += 0.01
148+
149+
No.of Successfully Trips is: 99
150+
Total reward sum: 2345.5
151+
152+
No.of Successfully Trips is: 100
153+
Total reward sum: 2299.0
154+
155+
No.of Successfully Trips is: 98
156+
Total reward sum: 2310.0
157+
158+
159+
No.of Successfully Trips is: 99
160+
Total reward sum: 2420.5
161+
162+
Case 6:
163+
164+
self.alpha = 1.0
165+
self.gamma = 0.75
166+
self.epsilon = 0.4
167+
168+
if self.alpha > .65:
169+
self.alpha -= 0.01
170+
self.gamma += 0.02
171+
self.epsilon += 0.01
172+
173+
No.of Successfully Trips is: 99
174+
Total reward sum: 2321.5
175+
176+
No.of Successfully Trips is: 100
177+
Total reward sum: 2285.5
178+
179+
No.of Successfully Trips is: 100
180+
Total reward sum: 2392.0
181+
182+
183+
Case 7:
184+
self.alpha = 1.0
185+
self.gamma = 0.75
186+
self.epsilon = 0.4
187+
188+
if self.alpha > .55:
189+
self.alpha -= 0.01
190+
self.gamma += 0.02
191+
self.epsilon += 0.01
192+
193+
No.of Successfully Trips is: 100
194+
Total reward sum: 2366.5
195+
196+
No.of Successfully Trips is: 99
197+
Total reward sum: 2280.5
198+
199+
No.of Successfully Trips is: 99
200+
Total reward sum: 2367.5
201+
202+
203+
204+
205+
How to use
206+

0 commit comments

Comments
 (0)