forked from mfaruqui/eval-word-vectors
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_write.py
25 lines (21 loc) · 805 Bytes
/
read_write.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import sys
import gzip
import numpy
import math
from collections import Counter
from operator import itemgetter
''' Read all the word vectors and normalize them '''
def read_word_vectors(filename):
word_vecs = {}
if filename.endswith('.gz'): file_object = gzip.open(filename, 'r')
else: file_object = open(filename, 'r')
for line_num, line in enumerate(file_object):
line = line.strip().lower()
word = line.split()[0]
word_vecs[word] = numpy.zeros(len(line.split())-1, dtype=float)
for index, vec_val in enumerate(line.split()[1:]):
word_vecs[word][index] = float(vec_val)
''' normalize weight vector '''
word_vecs[word] /= math.sqrt((word_vecs[word]**2).sum() + 1e-6)
sys.stderr.write("Vectors read from: "+filename+" \n")
return word_vecs