-
Notifications
You must be signed in to change notification settings - Fork 1
/
wmd.py
executable file
·66 lines (31 loc) · 1.16 KB
/
wmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys
from time import time
start_nb = time()
import gensim.models.keyedvectors as word2vec
from gensim.models import KeyedVectors
# Initialize logging.
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
a = sys.argv[1]
b = sys.argv[2]
print 'Calculating Similarity Score between Patient ' + a + ' and ' + 'Patient ' + b
f1 = open("txtData/"+str(a)+".txt", "r")
f2 = open("txtData/"+str(b)+".txt", "r")
sentence_1 = f1.read()
sentence_2 = f2.read()
sentence_1 = sentence_1.lower().split()
sentence_2 = sentence_2.lower().split()
start = time()
import os
from gensim.models import Word2Vec
model = KeyedVectors.load_word2vec_format("vectors.txt", binary=False)
print('Time taken to import vectors: %.2f seconds' % (time() - start))
# Normalizing word2vec vectors.
start = time()
model.init_sims(replace=True) # Normalizes the vectors in the word2vec class.
distance = model.wmdistance(sentence_1, sentence_2) # Compute WMD as normal.
print 'Calculating similarity took %.2f seconds to run.' %(time() - start)
print 'Distance: ' + str(distance)
d = distance
sim = 1/(1+d)
print 'Similarity: ' + str(sim)