Skip to content

Commit

Permalink
Cosmetic changes
Browse files Browse the repository at this point in the history
  • Loading branch information
sandeepjoshi1910 committed May 13, 2018
1 parent d9271f5 commit 4537d2d
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 49 deletions.
28 changes: 5 additions & 23 deletions JenkinsClustering.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import re
import os
import collections
import traceback

from StageExtractor import EntityExtractor
from keras.preprocessing.text import text_to_word_sequence
from shlex import shlex
Expand All @@ -6,10 +11,6 @@
from sklearn.cluster import KMeans
import pandas as pd
from Statistics import Statistics
import collections

import re
import os

class JenkinsClustering:
'''
Expand Down Expand Up @@ -65,8 +66,6 @@ def prepare_data_for_clustering(self):
df['text'] = data
df['file'] = files

# print(df)
# print(len(data))

# Create a TF - IDF matrix for the data
vectorizer = TfidfVectorizer(max_df=1.0, min_df=1,norm = None)
Expand All @@ -78,21 +77,4 @@ def prepare_data_for_clustering(self):
print('Unable to prepare the data for clustering')
return None, None



# j = JenkinsClustering()

# data, df = j.prepare_data_for_clustering()
# estimator = KMeans(n_clusters=30, init = 'k-means++', max_iter = 1000)
# estimator.fit(data)
# print(estimator.labels_)

# df['labels'] = estimator.labels_

# for item in set(estimator.labels_):
# con_string = ''
# for i in range(len(df)):
# if df['labels'][i] == item:
# con_string += df['text'][i]
# Statistics.create_word_cloud(con_string, "Cluster %s" %(item), '/Users/sandeepjoshi/Documents/CS540/Course_Project/sandeep_joshi__debojit_kaushik_course_project/cluster_%s.png' %(item))

10 changes: 7 additions & 3 deletions Jenkins_Analyzer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from JenkinsClustering import JenkinsClustering
from sklearn.cluster import KMeans
from Statistics import Statistics
import json
import traceback
import logging
import os
import collections

from JenkinsClustering import JenkinsClustering
from sklearn.cluster import KMeans
from Statistics import Statistics

LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s"
logging.basicConfig(filename = "info.log",
level=os.environ.get("LOGLEVEL", "INFO"),
Expand All @@ -26,6 +27,7 @@ def do_clustering():
print(collections.Counter(estimator.labels_))
df['labels'] = estimator.labels_
os.chdir('../')
# Gather data for a particular cluster and build a word cloud
for item in set(estimator.labels_):
con_string = ''
for i in range(len(df)):
Expand All @@ -41,6 +43,7 @@ def do_clustering():

def get_statistics():
try:
# Call all the analytic functions from Statistics.py
s = Statistics()
path = "./Jenkinsfiles/"
s.get_timeout_stats(path)
Expand All @@ -52,6 +55,7 @@ def get_statistics():
s.get_post_block_correlation_statistics()
s.get_parallel_block_statistics()

# Save the results to results.json
with open('results.json','w',encoding='utf8') as f:
f.write(json.dumps(s.statistics_dict, indent=4))
except Exception:
Expand Down
20 changes: 0 additions & 20 deletions Statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,23 +542,3 @@ def get_post_block_correlation_statistics(self):
except Exception:
log.info(traceback.format_exc())



# if __name__ == '__main__':
# try:
# s = Statistics()
# path = "./Jenkinsfiles/"
# s.get_timeout_stats(path)
# s.build_tool_stats(path)
# s.build_word_cloud_high_level(path)
# s.build_word_cloud_low_level(path)
# s.get_trigger_statistics()
# s.consolidate_post_block_statistics()
# s.get_post_block_correlation_statistics()
# s.get_parallel_block_statistics()
# with open('results.json','w',encoding='utf8') as f:
# f.write(json.dumps(s.statistics_dict, ensure_ascii=False))


# except Exception:
# print(traceback.format_exc())
Loading

0 comments on commit 4537d2d

Please sign in to comment.