-
Notifications
You must be signed in to change notification settings - Fork 0
/
conv.py
75 lines (59 loc) · 1.87 KB
/
conv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Adjustment Functions for BATS Dataset
"""
identity = lambda w: w
noun = lambda w: w + "-n"
adj = lambda w: w + "-j"
adv = lambda w: w + "-a"
verb = lambda w: w + "-v"
noun_cap = lambda w: noun(w.capitalize())
adj_cap = lambda w: adj(w.capitalize())
################################################################################
bats_conf = dict(
#
# Encyclopedic
#
e01=(noun_cap, noun_cap), # capital -> country
e02=(noun_cap, noun_cap), # country -> language
e03=(noun_cap, noun_cap), # uk-city -> county
e04=(noun_cap, adj_cap), # surname -> nationality
e05=(noun_cap, noun), # surname -> occupation
e06=(noun, noun), # animal -> young
e07=(noun, noun), # animal -> sound
# ... no, verbs do not work better for sounds
e08=(noun, noun), # animal -> shelter
e09=(noun, adj), # thing -> color
# ... for colors, nouns works almost as well as adjs
e10=(noun, noun), # male -> female
#
# Lexicographic
#
# Note.
# L07, L08 and L10 mix up POS tags,
# so these 3 are manually edited
l01=(noun, noun), #
l02=(noun, noun), #
l03=(noun, noun), #
l04=(noun, noun), #
l05=(noun, noun), #
l06=(noun, noun), #
l07=(identity, identity), # synonyms - intensity
l08=(identity, identity), # synonyms - exact
l09=(adj, adj), # antonyms-gradable
l10=(identity, identity), # antonyms-binary
#
# Derivational
#
d01=(noun, adj), # noun + <less>
d02=(adj, adj), # <un> + adj
d03=(adj, adv), # adj + <ly>
d04=(adj, adj), # <over> + adj
d05=(adj, noun), # adj + <ness>
d06=(verb, verb), # <re> + verb
d07=(verb, adj), # verb + <able>
d08=(verb, noun), # verb + <er>
d09=(verb, noun), # verb + <tion>
d10=(verb, noun), # verb + <ment>
# ---
# Note: Inflectional part is not here as SkEThes uses lemmas
)