Cosmetic changes

dkaushik94 · May 13, 2018 · 4537d2d · 4537d2d
1 parent d9271f5
commit 4537d2d
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 49 deletions.
diff --git a/JenkinsClustering.py b/JenkinsClustering.py
@@ -1,3 +1,8 @@
+import re
+import os
+import collections
+import traceback
+
 from StageExtractor import EntityExtractor
 from keras.preprocessing.text import text_to_word_sequence
 from shlex import shlex
@@ -6,10 +11,6 @@
 from sklearn.cluster import KMeans
 import pandas as pd
 from Statistics import Statistics
-import collections
-
-import re
-import os
 
 class JenkinsClustering:
     '''
@@ -65,8 +66,6 @@ def prepare_data_for_clustering(self):
             df['text'] = data
             df['file'] = files
 
-            # print(df)
-            # print(len(data))
 
             # Create a TF - IDF matrix for the data
             vectorizer = TfidfVectorizer(max_df=1.0, min_df=1,norm = None)
@@ -78,21 +77,4 @@ def prepare_data_for_clustering(self):
             print('Unable to prepare the data for clustering')
             return None, None
 
-
-
-# j = JenkinsClustering()
-
-# data, df = j.prepare_data_for_clustering()
-# estimator = KMeans(n_clusters=30, init = 'k-means++', max_iter = 1000)
-# estimator.fit(data)
-# print(estimator.labels_)
-
-# df['labels'] = estimator.labels_
-
-# for item in set(estimator.labels_):
-#     con_string = ''
-#     for i in range(len(df)):
-#         if df['labels'][i] == item:
-#             con_string += df['text'][i]
-#     Statistics.create_word_cloud(con_string, "Cluster %s" %(item), '/Users/sandeepjoshi/Documents/CS540/Course_Project/sandeep_joshi__debojit_kaushik_course_project/cluster_%s.png' %(item))
 
diff --git a/Jenkins_Analyzer.py b/Jenkins_Analyzer.py
@@ -1,12 +1,13 @@
-from JenkinsClustering import JenkinsClustering
-from sklearn.cluster import KMeans
-from Statistics import Statistics
 import json
 import traceback
 import logging
 import os
 import collections
 
+from JenkinsClustering import JenkinsClustering
+from sklearn.cluster import KMeans
+from Statistics import Statistics
+
 LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s"
 logging.basicConfig(filename = "info.log",
     level=os.environ.get("LOGLEVEL", "INFO"),
@@ -26,6 +27,7 @@ def do_clustering():
         print(collections.Counter(estimator.labels_))
         df['labels'] = estimator.labels_
         os.chdir('../')
+        # Gather data for a particular cluster and build a word cloud
         for item in set(estimator.labels_):
             con_string = ''
             for i in range(len(df)):
@@ -41,6 +43,7 @@ def do_clustering():
 
 def get_statistics():
     try:
+        # Call all the analytic functions from Statistics.py
         s = Statistics()
         path = "./Jenkinsfiles/"
         s.get_timeout_stats(path)
@@ -52,6 +55,7 @@ def get_statistics():
         s.get_post_block_correlation_statistics()
         s.get_parallel_block_statistics()
 
+        # Save the results to results.json
         with open('results.json','w',encoding='utf8') as f:
             f.write(json.dumps(s.statistics_dict, indent=4))
     except Exception:

diff --git a/Statistics.py b/Statistics.py
@@ -542,23 +542,3 @@ def get_post_block_correlation_statistics(self):
         except Exception:
             log.info(traceback.format_exc())
 
-
-
-# if __name__ == '__main__':
-#     try:
-#         s = Statistics()
-#         path = "./Jenkinsfiles/"
-#         s.get_timeout_stats(path)
-#         s.build_tool_stats(path)
-#         s.build_word_cloud_high_level(path)
-#         s.build_word_cloud_low_level(path)
-#         s.get_trigger_statistics()
-#         s.consolidate_post_block_statistics()
-#         s.get_post_block_correlation_statistics()
-#         s.get_parallel_block_statistics()
-#         with open('results.json','w',encoding='utf8') as f:
-#             f.write(json.dumps(s.statistics_dict, ensure_ascii=False))
-
-
-#     except Exception:
-#         print(traceback.format_exc())