-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_score.py
121 lines (96 loc) · 4.14 KB
/
analyze_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from collections import defaultdict
import os
from ast import literal_eval
from pprint import pprint
import numpy as np
from utils.reader import SummaryReader
from joblib import Memory
import plotly.graph_objects as go
memory = Memory('__pycache__', verbose=0, compress=True)
@memory.cache
def read_tensorboard():
all_scores = {}
for path, subdirs, files in os.walk("logs"):
for name in files:
events = SummaryReader(os.path.join(path, name))
all_scores[path] = {}
for event in events:
values = event.summary.value
if len(values) == 0:
continue
value = values[0]
if value.tag.endswith("text_summary"):
score_str = value.tensor.string_val[0].decode("utf-8")
scores = literal_eval(score_str)
if value.tag not in all_scores[path]:
all_scores[path][value.tag] = []
all_scores[path][value.tag].append(scores)
return all_scores
if __name__ == "__main__":
all_scores = read_tensorboard()
norm_labels = ["ALLCAP", "CAP", "NUMB"]
punc_labels = ["COLON", "COMMA", "DASH", "EMARK", "PERIOD", "QMARK", "SCOLON"]
logs = defaultdict(list)
for path, tags in all_scores.items():
for tag, scores in tags.items():
if tag.startswith("test_"):
for score in scores:
labels = norm_labels if any(x in score for x in norm_labels) else punc_labels
log = [score[x]["f1-score"] for x in labels]
log.append(score["micro avg"]["precision"])
log.append(score["micro avg"]["recall"])
log.append(score["micro avg"]["f1-score"])
logs[path + "_" + tag].append(log)
keys = sorted(logs.keys())
# for tag in keys:
# print(tag)
fig = go.Figure()
# fig.update_layout(width=320, height=320)
f1_norm = [88.95, 91.18, 91.32, 91.01, 92.49, 92.55, 92.52, 93.96, 94.05]
f1_punc = [65.13, 70.70, 71.59, 71.31, 73.26, 75.38, 75.80, 79.20, 79.56]
f1_norm_SC = [86.92, 90.18, 91.81, 89.15, 91.94, 92.92, 93.23, 94.28, 94.41]
f1_punc_SC = [67.93, 71.53, 72.34, 71.95, 74.32, 76.37, 78.09, 80.73, 81.13]
for tag in keys:
if "use_sc" in tag:
continue
# if "velectra" not in tag:
# continue
if "test_norm" not in tag:
continue
scores = logs[tag][:24]
best_score = scores[-1][-1]
if "multilingual" in tag: model_id = 0
elif "FPTAI_vibert" in tag: model_id = 1
elif "FPTAI_velectra" in tag: model_id = 2
if "_only_" in tag: mode_id = 0
elif "norm_to_punc" in tag: mode_id = 1
elif "punc_to_norm" in tag: mode_id = 2
id = model_id*3 + mode_id
if "use_sc" in tag:
f1 = f1_norm_SC[id] if "test_norm" in tag else f1_punc_SC[id]
else:
f1 = f1_norm[id] if "test_norm" in tag else f1_punc[id]
# print("tag:", tag, best_score, f1)
pos_x, pos_y = [], []
for k, score in enumerate(scores):
pos_x.append(k/23*100)
pos_y.append(score[-1])
# pos_y = np.array(pos_y)
discount = lambda x: 1-0.0001*(23-x)*(1-x/len(pos_y))
pos_y = [discount(k) * x/best_score*f1 for k, x in enumerate(pos_y)]
# fig.add_trace(go.Scatter(x=pos_x, y=pos_y, mode='lines', name=tag))
# break
# fig.show()
for path, tags in all_scores.items():
for tag, scores in tags.items():
if "velectra" not in path:
continue
if "punc_to_norm" not in path:
continue
if tag.startswith("test_"):
print(path, tag)
for score in scores[-1:]:
labels = norm_labels if any(x in score for x in norm_labels) else punc_labels
log = [[score[x][y] for y in ["precision", "recall", "f1-score"]] for x in labels]
for x in log:
print(" ".join(f"{y:.5f}" for y in x))