forked from dmis-lab/biobert
-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval_yesno.py
90 lines (74 loc) · 2.43 KB
/
eval_yesno.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import os
import argparse
import random
from tqdm import tqdm
def evaluate(args):
with open(args.na_prob_file, 'r') as fp:
na_probs = json.load(fp)
# Make official json outfile
out_json = {}
for qid in na_probs:
if na_probs[qid] > args.threshold:
out_json[qid] = 'no'
else:
out_json[qid] = 'yes'
with open(args.out_path, 'w') as fp:
json.dump(out_json, fp)
print('Official evaluation file dumped as {}'.format(args.out_path))
# If gold standard is not given, exit.
if not os.path.exists(args.eval_file):
return
# Eval with gold standard
with open(args.eval_file, 'r') as fp:
data = json.load(fp)['data']
qid2ans = {}
for article in data:
for paragraph in article['paragraphs']:
for qa in paragraph['qas']:
if 'answers' in qa:
qid2ans[qa['id']] = qa['answers']
# Yes/No F1
total = 0
correct = 0
yes = {'tp': 0, 'fp': 0, 'fn':0, 'tn':0}
no = {'tp': 0, 'fp': 0, 'fn':0, 'tn':0}
for qid, gt in qid2ans.items():
total += 1
correct += int(gt == out_json[qid])
if out_json[qid] == 'yes':
if out_json[qid] == gt:
yes['tp'] += 1
no['tn'] += 1
else:
yes['fp'] += 1
no['fn'] += 1
else:
if out_json[qid] == gt:
yes['tn'] += 1
no['tp'] += 1
else:
yes['fn'] += 1
no['fp'] += 1
yes_pr = yes['tp'] / (yes['tp'] + yes['fp'] + 1e-9)
yes_re = yes['tp'] / (yes['tp'] + yes['fn'] + 1e-9)
yes_f1 = 2 * yes_pr * yes_re / (yes_pr + yes_re + 1e-9)
no_pr = no['tp'] / (no['tp'] + no['fp'] + 1e-9)
no_re = no['tp'] / (no['tp'] + no['fn'] + 1e-9)
no_f1 = 2 * no_pr * no_re / (no_pr + no_re + 1e-9)
print('Accuracy: {}'.format(correct/total))
print('Yes F1: {}'.format(yes_f1))
print('No F1: {}'.format(no_f1))
print('Macro F1: {}'.format((yes_f1+no_f1)/2))
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('na_prob_file')
parser.add_argument('out_path')
parser.add_argument('--eval_file', type=str, default='')
parser.add_argument('--threshold', type=float, default=0.0)
return parser.parse_args()
def main():
args = get_args()
evaluate(args)
if __name__ == '__main__':
main()