synch notebook & master

amineremache · Feb 18, 2019 · 254ca1a · 254ca1a
1 parent 8688e52
commit 254ca1a
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 39 deletions.
diff --git a/bee.py b/bee.py
@@ -27,8 +27,8 @@ def localSearch(self):
                     if ((len(lista)==1) and (indice==i) and (i < self.data.nb_attribs-1)):
                         i+=1
                     self.solution.state[i]= (self.solution.state[i] + 1) % 2
-
                     quality = self.solution.get_accuracy(self.solution.get_state())
+
                     if (quality > best):
                         pos = i
                         best = quality
@@ -53,9 +53,9 @@ def localSearch(self):
     def ql_localSearch(self,maxIterIndex,flip):
 
         """The reason why we do this is to 
-        tune the exploitation/exploration trade-off """
-        iterations = round(maxIterIndex/self.locIterations) +1 
-        #iterations = 2*self.locIterations
+        explore at the beginning and 
+        eploit at the end to converge to the optimal solution"""
+        iterations = int(maxIterIndex/self.locIterations)+1 if int(maxIterIndex/self.locIterations)+1 <= self.locIterations else self.locIterations
         for itr in range(iterations):
 
           state = self.solution.get_state()
@@ -93,7 +93,7 @@ def ql_localSearch(self,maxIterIndex,flip):
           self.rl_return = self.data.ql.get_q_value(self.solution,action)
           self.fitness = acc_new_state
           self.solution = next_sol
-          print("Next state's acc : ",acc_new_state)
+          #print("Next state's acc : ",acc_new_state)
           #print("This is acc choosed : {0}".format(acc_new_state))
 
     def setSolution(self,solution):

diff --git a/fs_data.py b/fs_data.py
@@ -18,12 +18,12 @@ def __init__(self,typeOfAlgo,location,nbr_exec):
         self.ql = QLearning(len(self.df.columns),Solution.attributs_to_flip(len(self.df.columns)-1))
         self.fsd = FsProblem(self.typeOfAlgo,self.df,self.ql)
 
-        self.classifier_name = str(type(self.fsd.classifier)).strip('< > \' class ').split('.')[3]
-        path = './results/'+ self.dataset_name
+        self.classifier_name = str(type(self.fsd.classifier)).strip('< > \' class ').split('.')[3]]
+        path = "./results/" + self.dataset_name
         if not os.path.exists(path):
           os.makedirs(path + '/logs/')
           os.makedirs(path + '/sheets/')
-        self.instance_name = str(self.typeOfAlgo) + '-' + str(time.strftime("%m-%d-%Y_%H-%M-%S_", time.localtime()) + self.classifier_name)
+        self.instance_name = str(self.typeOfAlgo) + '-' + self.dataset_name + '_' +  str(time.strftime("%m-%d-%Y_%H-%M-%S_", time.localtime()) + self.classifier_name)
         log_filename = str(path + '/logs/'+ self.instance_name)
         if not os.path.exists(path):
           os.makedirs(path)
@@ -32,21 +32,23 @@ def __init__(self,typeOfAlgo,location,nbr_exec):
 
         print("[START] Dataset " + self.dataset_name + " description \n")
         print("Shape : " + str(self.df.shape) + "\n")
-        #print(self.df.describe())
+        print(self.df.describe())
         print("\n[END] Dataset " + self.dataset_name + " description\n")
         print("[START] Ressources specifications\n")
-        #!cat /proc/cpuinfo # Think of changing this when using Windows
+        os.exec('cat /proc/cpuinfo') # Think of changing this when using Windows
         print("[END] Ressources specifications\n")
 
 
         sheet_filename = str(path + '/sheets/'+ self.instance_name )
         self.workbook = xlsxwriter.Workbook(sheet_filename + '.xlsx')
 
         self.worksheet = self.workbook.add_worksheet(self.classifier_name)
-        self.worksheet.write(0,0,'Iteration')
-        self.worksheet.write(0,1,'Accuracy')
-        self.worksheet.write(0,2,'N_Features')
-        self.worksheet.write(0,3,'Time')
+        self.worksheet.write(0,0,"Iteration")
+        self.worksheet.write(0,1,"Accuracy")
+        self.worksheet.write(0,2,"N_Features")
+        self.worksheet.write(0,3,"Time")
+        self.worksheet.write(0,4,"Top_10%_features")
+        self.worksheet.write(0,5,"Size_sol_space")
 
     def run(self,flip,maxChance,nbrBees,maxIterations,locIterations):
         total_time = 0
@@ -64,6 +66,8 @@ def run(self,flip,maxChance,nbrBees,maxIterations,locIterations):
           self.worksheet.write(itr, 1, "{0:.2f}".format(best[0]))
           self.worksheet.write(itr, 2, best[1])
           self.worksheet.write(itr, 3, "{0:.3f}".format(t2-t1))
+          self.worksheet.write(itr, 4, "{0}".format(str([j[0] for j in [i for i in swarm.best_features()]])))
+          self.worksheet.write(itr, 5, len(Solution.solutions))
 
         print ("Total execution time of {0} executions \nfor dataset \"{1}\" is {2:.2f} s".format(self.nb_exec,self.dataset_name,total_time))
         self.workbook.close()
diff --git a/fs_problem.py b/fs_problem.py
@@ -44,5 +44,4 @@ def evaluate(self,solution):
         cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0) # Cross validation function
         results = cross_val_score(self.classifier, X, Y, cv=cv,scoring='accuracy')
         #print("\n[Cross validation results]\n{0}".format(results))
-        return results.mean()
-
+        return results.mean()
diff --git a/main.py b/main.py
@@ -12,12 +12,12 @@
     # Params init
 
     typeOfAlgo = 0
-    nbr_exec = 1
+    nbr_exec = 5
     flip = 5
     maxChance = 3
     nbrBees = 10
-    maxIterations = 4
-    locIterations = 2
+    maxIterations = 10
+    locIterations = 10
 
     instance = FSData(typeOfAlgo,location,nbr_exec)
     instance.run(flip,maxChance,nbrBees,maxIterations,locIterations)
diff --git a/rl.py b/rl.py
@@ -56,7 +56,7 @@ def step(self,solution,actions,flip):
             max_val = self.get_max_q_value(solution,action_values)[0] # getting the max next q_value
             argmax_actions=[self.get_max_q_value(solution,action_values)[1]] # saving the action that maxmizes the reward
 
-            # There may be actions that has the same reward, so we add them to the argmax_avtions
+            # There may be actions that have the same reward, so we add them to the argmax_avtions
             for ac in action_values : 
               ac_state = self.get_next_state(solution,ac)
               ac_state_q_val = self.get_q_value(solution,ac) + solution.get_accuracy(ac_state)

diff --git a/solution.py b/solution.py
@@ -3,6 +3,7 @@
 class Solution:
 
     solutions = {} 
+    best_sol = None
     tot_eval_time = 0
     sorting_time = 0
 
@@ -30,18 +31,25 @@ def set_accuracy(self,state):
         self.accuracy = Solution.solutions[Solution.str_sol(state)]
         t2 = time.time()
         Solution.tot_eval_time += t2-t1
+        if (Solution.best_sol == None) or (Solution.best_sol.get_accuracy(Solution.best_sol.get_state()) < self.accuracy):
+            Solution.best_sol = self
 
     def set_state(self,state): 
         self.state = copy.deepcopy(state)
 
     @staticmethod
     def get_best_sol():
-        t1 = time.time()
+      # This part has been changed by a variable "best_sol", because sorting was costing some execution time
+        """t1 = time.time()
         sorted_sols = sorted(Solution.solutions.items(), key=operator.itemgetter(1), reverse=True)
         t2 = time.time()
         #print("Best sol after sort : {0}".format(sorted_sols[0][1]))
         Solution.sorting_time += t2-t1
-        return sorted_sols[0][0] ,sorted_sols[0][1]
+        return sorted_sols[0][0] ,sorted_sols[0][1]"""
+
+        best_state = Solution.best_sol.get_state()
+        best_accuracy = Solution.best_sol.get_accuracy(best_state)
+        return Solution.str_sol(best_state), best_accuracy
 
 
     @staticmethod
@@ -58,7 +66,12 @@ def str_sol(mlist):
         for element in mlist:
             result += str(element)
         return result
-
+
+    @staticmethod
+    def sol_to_list(solution):
+      sol_list=[i for i, n in enumerate(solution) if n == 1]
+      return sol_list
+
     @staticmethod
     def list_sol(key):
         mlist = [ int(i) for i in key ]
@@ -78,13 +91,4 @@ def xor(x, y):
 
     @staticmethod
     def get_avg_time():
-        return Solution.tot_eval_time/len(Solution.solutions)
-
-    @staticmethod
-    def sol_to_list(solution):
-        sol_list=[i for i, n in enumerate(solution) if n == 1]
-        return sol_list
-
-    @staticmethod
-    def attributs_to_flip(nb_att):
-        return list(range(nb_att))
+      return Solution.tot_eval_time/len(Solution.solutions)
diff --git a/swarm.py b/swarm.py
@@ -1,5 +1,5 @@
 from bee import Bee
-import random, time
+import random, time, operator
 from solution import Solution
 
 class Swarm :
@@ -15,6 +15,7 @@ def __init__(self,problem,flip,maxChance,nbrBees,maxIterations,locIterations):
         self.refSolution = Bee(-1,self.data,self.locIterations,Bee.Rand(self.data.nb_attribs))
         self.bestSolution = self.refSolution
         self.tabou=[]
+        self.feature_count = { i:0 for i in range(self.data.nb_attribs) }
         Solution.solutions.clear()
 
     def searchArea(self):    
@@ -86,7 +87,7 @@ def distanceTabou(self,bee):
         for i in range(len(self.tabou)):
             cpt=0
             for j in range(self.data.nb_attribs):
-                if (bee.solution.state[j] != self.tabou[i].solution.state[j]) :
+                if (bee.solution.get_state()[j] != self.tabou[i].solution.get_state()[j]) :
                       cpt +=1
             if (cpt<=1) :
                 return 0
@@ -95,6 +96,7 @@ def distanceTabou(self,bee):
         return distanceMin
 
     def bestBeeQuality(self):
+
         distance = 0
         i=0
         pos=-1
@@ -133,7 +135,7 @@ def bso(self,typeOfAlgo,flip):
         i=1
         while(i<=self.maxIterations):
             t1 = time.time()
-            print("refSolution is : ", Solution.str_sol(self.refSolution.solution.get_state()))
+            #print("\nrefSolution is : ", Solution.str_sol(self.refSolution.solution.get_state()))
             self.tabou.append(self.refSolution)
             print("BSO iteration N° : ",i)
 
@@ -147,11 +149,12 @@ def bso(self,typeOfAlgo,flip):
               elif (typeOfAlgo == 1):
                 for episode in range(self.locIterations):
                   self.beeList[j].ql_localSearch(i,flip)
+              self.count_features(self.beeList[j].solution.get_state())
               print( "Fitness of bee " + str(j) + " is : " + str(self.beeList[j].fitness) + "\n")
             self.refSolution = self.selectRefSol()
-            i+=1
             t2 = time.time()
-            print("Time of iteration N°{0} : {1:.2f} s".format(i,t2-t1))
+            print("Time of iteration N°{0} : {1:.2f} s\n".format(i,t2-t1))
+            i+=1
 
         print("\n[BSO parameters used]\n")
         print("Type of algo : {0}".format(typeOfAlgo))
@@ -160,6 +163,7 @@ def bso(self,typeOfAlgo,flip):
         print("Nbr of Bees : {0}".format(self.nbrBees))
         print("Nbr of Max Iterations : {0}".format(self.maxIterations))
         print("Nbr of Loc Iterations : {0}\n".format(self.locIterations))
+        print("Must 10% used features : ",self.best_features())
         print("Best solution found : ",self.bestSolution.solution.get_state())
         print("Accuracy of found sol : {0:.2f} ".format(self.bestSolution.fitness*100))
         print("Number of features used : {0}".format(Solution.nbrUn(self.bestSolution.solution.get_state())))
@@ -168,5 +172,15 @@ def bso(self,typeOfAlgo,flip):
         print("Global optimum : {0}, {1:.2f}".format(Solution.get_best_sol()[0],Solution.get_best_sol()[1]*100))
         if (typeOfAlgo == 1):
           print("Return (Q-value) : ",self.bestSolution.rl_return)  
-          print("Total sorting time : {0:.2f} s".format(Solution.sorting_time))
+          #print("Total sorting time : {0:.2f} s".format(Solution.sorting_time))
         return self.bestSolution.fitness*100, Solution.nbrUn(self.bestSolution.solution.get_state())
+
+
+    def count_features(self,solution):
+        self.feature_count = {i:self.feature_count[i]+n for i, n in enumerate(solution)}
+
+    def best_features(self):
+        sorted_features = sorted(self.feature_count.items(), key=operator.itemgetter(1), reverse=True)
+        top_10 = round(0.1*self.data.nb_attribs)+1
+        best_features = sorted_features[:top_10]
+        return best_features