forked from chokkan/crfsuite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_mallet.py
executable file
·77 lines (62 loc) · 2.13 KB
/
bench_mallet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
import sys
import os
import string
from bench import *
MALLET='java -cp "/home/okazaki/install/mallet-2.0.6/class:/home/okazaki/install/mallet-2.0.6/lib/mallet-deps.jar" cc.mallet.fst.SimpleTagger'
OUTDIR='mallet/'
training_patterns = (
('num_features', r'^Number of weights = (\d+)', 1, int, last),
('time', r'^([\d.]+)user ([\d.]+)system', (1, 2), float, sum),
('iterations', r'^CRF finished one iteration of maximizer, i=(\d+)', 1, int, len),
# ('update', r'^Seconds required for this iteration: ([\d.]+)', 1, float, min),
('loss', r'^getValue\(\) \(loglikelihood, optimizable by label likelihood\) = -([\d.]+)', 1, float, last),
)
tagging_patterns = (
('accuracy', r'^Testing accuracy=([\d.]+)', 1, float, last),
)
params = {
'default': '--gaussian-variance 0.70710678118654746',
}
if __name__ == '_main__':
print analyze_log(sys.stdin, training_patterns)
if __name__ == '__main__':
fe = sys.stderr
R = {}
for name, param in params.iteritems():
model = OUTDIR + name + '.model'
trlog = OUTDIR + name + '.tr.log'
trtxt = LOGDIR + 'mallet-' + name + '.txt'
tglog = OUTDIR + name + '.tg.log'
s = string.Template(
'time $mallet --train true $param --model-file $model train.mallet > $trlog 2>&1'
)
cmd = s.substitute(
mallet=MALLET,
param=param,
model=model,
trlog=trlog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
fo = open(trtxt, 'w')
fo.write('$ %s\n' % cmd)
fo.write(open(trlog, 'r').read())
s = string.Template(
'$mallet --model-file $model --test lab test.mallet > $tglog 2>&1'
)
cmd = s.substitute(
mallet=MALLET,
model=model,
tglog=tglog
)
fe.write(cmd)
fe.write('\n')
#os.system(cmd)
D = analyze_log(open(trlog), training_patterns)
D['update'] = 0.
D.update(analyze_log(open(tglog), tagging_patterns))
D['logfile'] = trtxt
R[name] = D
print repr(R)