forked from chokkan/crfsuite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchunking.py
executable file
·49 lines (41 loc) · 1.15 KB
/
chunking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
"""
A feature extractor for chunking.
Copyright 2010,2011 Naoaki Okazaki.
"""
# Separator of field values.
separator = ' '
# Field names of the input data.
fields = 'w pos y'
# Attribute templates.
templates = (
(('w', -2), ),
(('w', -1), ),
(('w', 0), ),
(('w', 1), ),
(('w', 2), ),
(('w', -1), ('w', 0)),
(('w', 0), ('w', 1)),
(('pos', -2), ),
(('pos', -1), ),
(('pos', 0), ),
(('pos', 1), ),
(('pos', 2), ),
(('pos', -2), ('pos', -1)),
(('pos', -1), ('pos', 0)),
(('pos', 0), ('pos', 1)),
(('pos', 1), ('pos', 2)),
(('pos', -2), ('pos', -1), ('pos', 0)),
(('pos', -1), ('pos', 0), ('pos', 1)),
(('pos', 0), ('pos', 1), ('pos', 2)),
)
import crfutils
def feature_extractor(X):
# Apply attribute templates to obtain features (in fact, attributes)
crfutils.apply_templates(X, templates)
if X:
# Append BOS and EOS features manually
X[0]['F'].append('__BOS__') # BOS feature
X[-1]['F'].append('__EOS__') # EOS feature
if __name__ == '__main__':
crfutils.main(feature_extractor, fields=fields, sep=separator)