-
Notifications
You must be signed in to change notification settings - Fork 0
/
complexityStatsGen.py
122 lines (99 loc) · 4.22 KB
/
complexityStatsGen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import subprocess
import lizard
import json
import sys
from pygount import SourceAnalysis
def generate_stats(file_path, problem_num, file_label, print_header, outfile):
file_stats = calculate_stats(file_path)
if print_header:
file_stats_header = "filesource,problem_num,filename,"
file_stats_header += ','.join(str(x) for x in file_stats.keys())
print(file_stats_header, file=outfile)
file_stats_str = '"' + file_label + '","' + problem_num + '","' + os.path.basename(file_path) + '",'
file_stats_str += ','.join(str(x) if x is not None else "" for x in file_stats.values())
print(file_stats_str, file=outfile)
def calculate_stats(file_path):
file_stats = {}
# Pygount (lines of code)
pygount = SourceAnalysis.from_file(file_path, "eleganceStats")
# print(pygount)
file_stats["nloc_pygount"] = pygount.source_count
file_stats["comment_count"] = pygount.documentation_count
file_stats["empty_count"] = pygount.empty_count
# Lizard (complexity)
liz = lizard.analyze_file(file_path)
# print(liz.__dict__)
file_stats["nloc_lizard"] = liz.nloc
file_stats["token_count"] = liz.token_count
file_stats["num_functions"] = len(liz.function_list)
# for f in liz.function_list:
# print("=== " + f.name)
# print(f.__dict__)
func_ccs = [f.cyclomatic_complexity for f in liz.function_list]
if func_ccs:
file_stats["min_func_cc"] = min(func_ccs)
file_stats["max_func_cc"] = max(func_ccs)
file_stats["avg_func_cc"] = sum(func_ccs) / len(func_ccs)
else:
file_stats["min_func_cc"] = None
file_stats["max_func_cc"] = None
file_stats["avg_func_cc"] = None
token_counts = [f.token_count for f in liz.function_list]
if token_counts:
file_stats["min_token_count"] = min(token_counts)
file_stats["max_token_count"] = max(token_counts)
file_stats["avg_token_count"] = sum(token_counts) / len(token_counts)
else:
file_stats["min_token_count"] = None
file_stats["max_token_count"] = None
file_stats["avg_token_count"] = None
# TODO: Do we need to capture top_nesting_level, fan_in, fan_out, general_fan_out?
# What are they??
# multimetric
mm_output = subprocess.run(["multimetric", file_path], capture_output=True, text=True)
# print(mm_output)
mm_result = json.loads(mm_output.stdout)
# print(mm_result)
# There should be only one input file so the overall results
# should be the same as the per-file results
for metric_name in mm_result["overall"]:
file_stats["mm_" + metric_name] = mm_result["overall"][metric_name]
return file_stats
def calculate_stats_for_dirtree(file_path, outfile_name):
samples_dict = {}
outfile = sys.stdout
if outfile_name != 'stdout':
outfile = open(outfile_name, "w")
for root, dirs, files in os.walk(file_path):
for name in files:
if name == 'MANIFEST':
# print(os.path.join(root, name))
with open(os.path.join(root, name), "r") as file:
# read the contents of the file
file_contents = [s for s in file.read().splitlines() if s]
# print(file_contents)
solution_list = [line.split(' ') for line in file_contents[1:]]
samples_dict[file_contents[0]] = solution_list
# for name in dirs:
# print(os.path.join(root, name))
# print(samples_dict)
solution_num = 1
for dirname in samples_dict.keys():
for solution in samples_dict[dirname]:
problem_num = solution[0]
filename = solution[1]
full_filename = file_path + os.sep + dirname + os.sep + filename
generate_stats(full_filename, problem_num, dirname, (solution_num == 1), outfile)
solution_num += 1
if outfile_name != 'stdout':
outfile.close()
if __name__ == '__main__':
# get the filename from the command line arguments
num_args = len(sys.argv)
if num_args < 3:
print("Usage: python complexityStatsGen.py <label> <filename>")
sys.exit(1)
file_label = sys.argv[1]
file_path = sys.argv[2]
print_header = (num_args >= 4 and sys.argv[3] == "--header")