Smart Cab: Adding a smart cab log analyser and first mission report file

msampathkumar · msampathkumar · commit 4d52778c887c · 2016-10-24T19:18:34.000+05:30
diff --git a/projects/smartcab/smartcab/log_analysis.py b/projects/smartcab/smartcab/log_analysis.py
@@ -0,0 +1,168 @@
+'''
+
+Welcome to Smart Cab Log Analyser !!!
+
+This is a simple script to do ETL on Agent.py log/output data to analyse the performance of Agent Learning
+
+* trail : success or failure
+* deadline : last deadline value
+* reward : last reward value
+
+You can redirect the trails logs posted by smartcab agent to a log_file and provide it as a input to analyse.
+
+Note:
+    * for learning update outformat is expecter as below. You need to add EXPECTED_REWARD to the following
+    
+    LearningAgent.update(): deadline = 30, inputs = {'light': 'red', 'oncoming': None, 'right': None, 'left': None}, action = None, expected_reward = 0, reward = 0.0
+'''
+
+import sys
+
+from re import findall as re_findall
+
+import pandas as pd
+
+FILE = 'smartcab/test_log100'
+
+def fetch_data(filename):
+    data = open(filename).readlines()
+    return data
+
+def fetch_value(inp_str):
+    return float(inp_str.split('=')[-1])
+
+def check_environment_reset(env_reset):
+    data = []
+    start, end, deadline = '', '', ''
+    if env_reset.startswith('Environment.reset()'):
+        data = re_findall('[\w.]+', env_reset)
+    for i, each in enumerate(data):
+        if each == 'start':
+            start = map(int, [data[i + 1], data[i + 2]])
+        elif each == 'destination':
+            end = map(int, [data[i + 1], data[i + 2]])
+        elif each == 'deadline':
+            deadline = int(data[i+1])
+    return start, end, deadline
+
+def check_learning_update(learning_update):
+    deadline, expected_reward, reward = 0, 0, 0
+    data = []
+    if learning_update.startswith('LearningAgent.update()'):
+        data = re_findall('[\w.]+', learning_update)
+    for i, each in enumerate(data):
+        if each == 'deadline':
+            deadline = data[i + 1]
+        elif each == 'expected_reward':
+            expected_reward = data[i + 1]
+        elif each == 'reward':
+            reward = data[i + 1]
+    return deadline, expected_reward, reward
+
+
+def check_learning_update_old(learning_update):
+    deadline, expected_reward, reward = 0, 0, 0
+    if learning_update.startswith('LearningAgent.update()'):
+        learning_update = learning_update[24:].split(', ')
+        # values
+        deadline = fetch_value(learning_update[0])
+        expected_reward = fetch_value(learning_update[-2])
+        reward = fetch_value(learning_update[-1])
+    return deadline, expected_reward, reward
+
+
+def success_check(data):
+    '''
+    Sequentially Parses the log and fetches data as and when we get
+    '''
+    all_outcomes = []
+    all_trails = []
+    all_deadlines = []
+    all_expected_rewards = []
+    # vars
+    all_rewards = []
+    all_deadlines = []
+    all_start = []
+    all_destinations = []
+    all_main_deadlines = []
+    # vars
+    trail_last_update = False
+    tmp_success = -1
+    counter = 0
+    reached_msg, aborted_msg = 'Primary agent has reached destination', 'Trial aborted'
+
+    for i, each in enumerate(data):
+        if each.startswith('Environment.reset()'):
+            counter += 1
+            start_point, destination_point, main_deadline = check_environment_reset(each)
+
+        if each.startswith('LearningAgent.update()'):
+            deadline, expected_reward, reward = check_learning_update(each)
+            #
+            all_trails.append(counter)
+            all_outcomes.append(-1)
+            all_rewards.append(reward)
+            #
+            all_expected_rewards.append(expected_reward)
+            all_start.append(start_point)
+            all_destinations.append(destination_point)
+            all_deadlines.append(deadline)
+            all_main_deadlines.append(main_deadline)
+
+        if trail_last_update:
+            trail_last_update = False
+            all_outcomes[-1] = tmp_success
+            tmp_success = -1
+        # trail end check - if it is
+        reached_chk = reached_msg in each 
+        aborted_chk = aborted_msg in each
+        # trail ended
+        if reached_chk or aborted_chk:
+            trail_last_update = True
+            if reached_chk:
+                tmp_success = 1
+            else:
+                tmp_success = 0
+    ret_dict = {
+        'all_trails' : all_trails,
+        'all_outcomes' : all_outcomes,
+        'all_rewards' : all_rewards,
+        'all_deadlines' : all_deadlines,
+        'all_expected_rewards' : all_expected_rewards,
+        'all_start' : all_start,
+        'all_destinations' : all_destinations,
+        'all_main_deadline' : all_main_deadlines
+        }
+    return ret_dict
+
+
+def total_stats(filename=FILE):
+    data = fetch_data(filename)
+    game = success_check(data)
+    game_stats = pd.DataFrame.from_dict(game)
+
+    for col in [u'all_deadlines',  u'all_expected_rewards', u'all_trails',
+               u'all_main_deadline', u'all_outcomes', u'all_rewards', # u'all_start', u'all_destinations'
+               ]:
+        game_stats[col] = pd.to_numeric(game_stats[col])
+        
+    game_stats['Q_pred'] = game_stats.all_expected_rewards - game_stats.all_outcomes
+    game_stats['steps'] = game_stats.all_main_deadline - game_stats.all_deadlines
+    game_stats['avg_steps'] = game_stats.all_rewards / game_stats.steps
+
+    #
+    # Checking how many reached desctination
+    #
+    rewarded_deadlines = (game_stats.all_rewards >= 12) & (game_stats.all_deadlines >= 0)
+    print '\nNo.of Successfully Trips is: %s' % game_stats[rewarded_deadlines].all_rewards.count()
+    print 'Total reward sum: %s' % game_stats.all_rewards.sum()
+    print 'Total time saved: %s' % int(game_stats[['steps']][rewarded_deadlines].sum())
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        filename = str(sys.argv[1])
+        total_stats(filename)
+    else:
+        total_stats('q_log02')
+
+
diff --git a/projects/smartcab/smartcab/report.txt b/projects/smartcab/smartcab/report.txt
@@ -0,0 +1,206 @@
+Smart Cab - Agent
+
+
+QUESTION: Observe what you see with the agent's behavior as it takes random actions. Does the smartcab eventually make it to the destination? Are there any other interesting observations to note?
+
+Yes, after taking random actions also smartcab eventually makes it to the destination. I believe this is due to lack to deadline enforcement.
+
+Some expected observations are
+* Takes lot of time & step compared to Q learning SmartCab.
+* Prone to lot of accidents
+
+
+QUESTION: What states have you identified that are appropriate for modeling the smartcab and environment? Why do you believe each of these states to be appropriate for this problem?
+
+OPTIONAL: How many states in total exist for the smartcab in this environment? Does this number seem reasonable given that the goal of Q-Learning is to learn and make informed decisions about each state? Why or why not?
+
+As per my program, a state consists of following
+* next_waypoint - (None, left, forward, right)
+* inputs - light - (Green, Red)
+* inputs - left  - (None, left, forward, right)
+* inputs - forward  - (None, left, forward, right)
+* inputs - right  - (None, left, forward, right)
+
+Total states identified = 4 * 2 * 4 * 4 * 4 = 512.
+
+I believe it’s only a natural instinct to check Safety and Precaution, to be cautious/alert when we are closer to dangers. So while we are driving, I see the possible main reason of getting hurt(accident) could be is other car drivers does not know I am driving in this direction and failed to follow traffic signals. Although if other drivers does not follow or mis-interrupt traffic rules then we might end in a incident, so hope for now case study let assume that all other drivers are well trained algorithms and lets do our part of safety control :)
+
+So in an intersection, checking left, right and forward direction is important. In similar way, input - light is important for us to follow and not to make an accident.
+
+Now that we all the basic details for Safety and Precaution, we are only need one basic detail like where to go(next_waypoint).
+
+In total we need these 5 inputs are must to keep our driver and my customer(you) safe and reach destination. So if a Q learning machine needs to my drivers job then it need to what I know right?
+
+
+QUESTION: What changes do you notice in the agent's behavior when compared to the basic driving agent when random actions were always taken? Why is this behavior occurring?
+
+Observation in my test trial with Q learning
+* I see that 99 trail finished before deadline.
+* Q learning was able to complete Trails runs for 100 faster than random logic.
+
+
+QUESTION: Report the different values for the parameters tuned in your basic implementation of Q-Learning. For which set of parameters does the agent perform best? How well does the final driving agent perform?
+
+QUESTION: Does your agent get close to finding an optimal policy, i.e. reach the destination in the minimum possible time, and not incur any penalties? How would you describe an optimal policy for this problem?
+
+Before we find an optimal policy, let have some key important roles that are required for a (smart) cab driver.
+
+1. Driver does follow all traffic rules ==> No.of -tives records should less. We would prefer to have zero but learning requires exploring & failing is a part our daily learning curves
+2. Driver's -tive should reduce as trails increase
+3. Reaches to destination
+4. Reaches to destination early if possible
+
+So, I have parameters alpha, gamma, epsilon which control the learning behaving of Q and these get updated at two place. One is when our agent is in the initialisation stage and second is during each reset of agent, we increment or decrement these initial values.
+
+
+If I/you were that smart program, it would be wise to try different combination during initial start with good learning curve and as you learn and more experienced you already would have learned all basics so you try to do better performance  experience and less randomness & learning curve.
+
+So I kept the learning curve (self.alpha) higher than .5 and I experimented with gamma and epsilon. These the best combination that gave more satisfactory scores are these
+
+# initial settings
+        self.alpha = 1.0
+        self.gamma = 0.75
+        self.epsilon = 0.4
+
+# improvements during reset
+        if self.alpha > .55:
+            self.alpha -= 0.01
+            self.gamma += 0.02
+            self.epsilon += 0.01
+
+
+
+Other experiments I have tried
+
+Checking Initial values for following reset parameters
+
+        self.alpha -= 0.01
+        self.gamma += 0.01
+        self.epsilon += 0.01
+
+Case 0:
+
+        self.alpha = 1.0
+        self.gamma = 0.1
+        self.epsilon = 0.1
+
+No.of Successfully Trips is: 13
+Total reward sum: 586.0
+
+No.of Successfully Trips is: 38
+Total reward sum: 1205.5
+
+
+Case 1: Conclusion: given other factor are constant, therein in an increase in score if initial gamma is higher(>0.1)
+
+No.of Successfully Trips is: 61
+Total reward sum: 1734.5
+
+No.of Successfully Trips is: 65
+Total reward sum: 1736.0
+
+        self.alpha = 1.0
+        self.gamma = 0.5
+        self.epsilon = 0.1
+
+Case 2: Conclusion: given other factor are constant, therein in an increase in score if initial epsilon is higher(>0.1)
+
+No.of Successfully Trips is: 42
+Total reward sum: 1290.0
+
+No.of Successfully Trips is: 47
+Total reward sum: 1381.5
+
+        self.alpha = 1.0
+        self.gamma = 0.1
+        self.epsilon = 0.5
+
+
+Case 3:
+
+        self.alpha = 1.5
+        self.gamma = 0.1
+        self.epsilon = 0.1
+
+No.of Successfully Trips is: 95
+Total reward sum: 2485.5
+
+Case 4:
+
+        self.alpha = 1.5
+        self.gamma = 0.75
+        self.epsilon = 0.5
+
+No.of Successfully Trips is: 99
+Total reward sum: 2336.5
+
+
+Case 5:
+
+        self.alpha = 1.0
+        self.gamma = 0.75
+        self.epsilon = 0.4
+
+        if self.alpha > .55:
+            self.alpha -= 0.01
+        self.gamma += 0.01
+        self.epsilon += 0.01
+
+No.of Successfully Trips is: 99
+Total reward sum: 2345.5
+
+No.of Successfully Trips is: 100
+Total reward sum: 2299.0
+
+No.of Successfully Trips is: 98
+Total reward sum: 2310.0
+
+
+No.of Successfully Trips is: 99
+Total reward sum: 2420.5
+
+Case 6:
+
+        self.alpha = 1.0
+        self.gamma = 0.75
+        self.epsilon = 0.4
+
+        if self.alpha > .65:
+            self.alpha -= 0.01
+            self.gamma += 0.02
+            self.epsilon += 0.01
+
+No.of Successfully Trips is: 99
+Total reward sum: 2321.5
+
+No.of Successfully Trips is: 100
+Total reward sum: 2285.5
+
+No.of Successfully Trips is: 100
+Total reward sum: 2392.0
+
+
+Case 7:
+        self.alpha = 1.0
+        self.gamma = 0.75
+        self.epsilon = 0.4
+
+        if self.alpha > .55:
+            self.alpha -= 0.01
+            self.gamma += 0.02
+            self.epsilon += 0.01
+
+No.of Successfully Trips is: 100
+Total reward sum: 2366.5
+
+No.of Successfully Trips is: 99
+Total reward sum: 2280.5
+
+No.of Successfully Trips is: 99
+Total reward sum: 2367.5
+
+
+
+
+How to use
+