-
Notifications
You must be signed in to change notification settings - Fork 35
/
fast_features.py
69 lines (56 loc) · 2.75 KB
/
fast_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
'''
AAA lllllll lllllll iiii
A:::A l:::::l l:::::l i::::i
A:::::A l:::::l l:::::l iiii
A:::::::A l:::::l l:::::l
A:::::::::A l::::l l::::l iiiiiii eeeeeeeeeeee
A:::::A:::::A l::::l l::::l i:::::i ee::::::::::::ee
A:::::A A:::::A l::::l l::::l i::::i e::::::eeeee:::::ee
A:::::A A:::::A l::::l l::::l i::::i e::::::e e:::::e
A:::::A A:::::A l::::l l::::l i::::i e:::::::eeeee::::::e
A:::::AAAAAAAAA:::::A l::::l l::::l i::::i e:::::::::::::::::e
A:::::::::::::::::::::A l::::l l::::l i::::i e::::::eeeeeeeeeee
A:::::AAAAAAAAAAAAA:::::A l::::l l::::l i::::i e:::::::e
A:::::A A:::::A l::::::ll::::::li::::::ie::::::::e
A:::::A A:::::A l::::::ll::::::li::::::i e::::::::eeeeeeee
A:::::A A:::::A l::::::ll::::::li::::::i ee:::::::::::::e
AAAAAAA AAAAAAAlllllllllllllllliiiiiiii eeeeeeeeeeeeee
______ _ ___ ______ _____
| ___| | | / _ \ | ___ \_ _| _
| |_ ___ __ _| |_ _ _ _ __ ___ ___ / /_\ \| |_/ / | | (_)
| _/ _ \/ _` | __| | | | '__/ _ \/ __| | _ || __/ | |
| || __/ (_| | |_| |_| | | | __/\__ \ | | | || | _| |_ _
\_| \___|\__,_|\__|\__,_|_| \___||___/ \_| |_/\_| \___/ (_)
_____ _
|_ _| | |
| | _____ _| |_
| |/ _ \ \/ / __|
| | __/> <| |_
\_/\___/_/\_\\__|
Featurize folders of text files if default_text_features = ['fast_features']
This is the Facebook FastText model:
https://fasttext.cc/docs/en/english-vectors.html
'''
import numpy as np
def fast_featurize(sentence,model):
size=300
sentences2=sentence.split()
w2v_embed=list()
for i in range(len(sentences2)):
try:
feature=model[sentences2[i]]
print(feature)
w2v_embed.append(feature)
except:
#pass if there is an error to not distort averages... :)
pass
out_embed=np.zeros(size)
for j in range(len(w2v_embed)):
out_embed=out_embed+np.array(w2v_embed[j])
out_embed=(1/len(w2v_embed))*out_embed
print(len(w2v_embed))
features=out_embed
labels=list()
for i in range(len(features)):
labels.append('fast_feature_%s'%(str(i+1)))
return features, labels