forked from chokkan/crfsuite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_crfpp.py
executable file
·78 lines (63 loc) · 1.88 KB
/
bench_crfpp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
import sys
import os
import string
from bench import *
CRFPP_LEARN='/home/okazaki/local/bin/crf_learn'
CRFPP_TEST='/home/okazaki/local/bin/crf_test'
OUTDIR='crfpp/'
training_patterns = (
('num_features', r'^Number of features:[ ]*(\d+)', 1, int, last),
('time', r'^Done!([\d.]+)', 1, float, last),
('iterations', r'^iter=(\d+)', 1, int, last),
('update', r'time=([\d.]+)', 1, float, min),
('loss', r'obj=([\d.]+)', 1, float, last),
)
tagging_patterns = (
('accuracy', r'^Item accuracy: ([\d.]+)', 1, float, last),
)
params = {
'lbfgs': '-a CRF-L2',
'mira': '-a MIRA',
}
if __name__ == '_main__':
print analyze_log(sys.stdin, training_patterns)
if __name__ == '__main__':
fe = sys.stderr
R = {}
for name, param in params.iteritems():
model = OUTDIR + name + '.model'
trlog = OUTDIR + name + '.tr.log'
trtxt = LOGDIR + 'crfpp-' + name + '.txt'
tglog = OUTDIR + name + '.tg.log'
s = string.Template(
'$crfpp_learn $param template.crfpp train.txt $model > $trlog'
)
cmd = s.substitute(
crfpp_learn=CRFPP_LEARN,
param=param,
model=model,
trlog=trlog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
fo = open(trtxt, 'w')
fo.write('$ %s\n' % cmd)
fo.write(open(trlog, 'r').read())
s = string.Template(
'$crfpp_test -m $model test.txt | ./accuracy.py > $tglog'
)
cmd = s.substitute(
crfpp_test=CRFPP_TEST,
model=model,
tglog=tglog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
D = analyze_log(open(trlog), training_patterns)
D.update(analyze_log(open(tglog), tagging_patterns))
D['logfile'] = trtxt
R[name] = D
print repr(R)