forked from chokkan/crfsuite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_crfsgd.py
executable file
·76 lines (61 loc) · 1.85 KB
/
bench_crfsgd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
import sys
import os
import string
from bench import *
CRFSGD='/home/okazaki/install/sgd-1.3/crf/crfsgd'
OUTDIR='crfsgd/'
training_patterns = (
('num_features', r'features: (\d+)', 1, int, last),
('time', r'^Done! ([\d.]+)', 1, float, last),
('iterations', r'^\[Epoch (\d+)\]', 1, int, last),
('update', r'^\[Epoch \d+\][^a-z]+wnorm:[^a-z]+total time: ([\d.]+) seconds$', 1, float, diffmin),
('loss', r'loss: ([\d.]+)', 1, float, last),
)
tagging_patterns = (
('accuracy', r'^Item accuracy: ([\d.]+)', 1, float, last),
)
params = {
'default': "-f 1 -r 100 -e ''",
}
if __name__ == '__main__':
fe = sys.stderr
R = {}
for name, param in params.iteritems():
model = OUTDIR + name + '.model'
trlog = OUTDIR + name + '.tr.log'
trtxt = LOGDIR + 'crfsgd-' + name + '.txt'
tglog = OUTDIR + name + '.tg.log'
s = string.Template(
'$crfsgd $param $model template.crfpp train.txt > $trlog'
)
cmd = s.substitute(
crfsgd=CRFSGD,
param=param,
model=model,
trlog=trlog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
fo = open(trtxt, 'w')
fo.write('$ %s\n' % cmd)
fo.write(open(trlog, 'r').read())
s = string.Template(
'$crfsgd -t $model test.txt | ./accuracy.py > $tglog'
)
cmd = s.substitute(
crfsgd=CRFSGD,
model=model,
tglog=tglog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
D = analyze_log(open(trlog), training_patterns)
D.update(analyze_log(open(tglog), tagging_patterns))
D['logfile'] = trtxt
R[name] = D
print repr(R)
if __name__ == '_main__':
print analyze_log(sys.stdin, training_patterns)