1
+ # coding: utf-8
2
+
3
+ # License: BSD 3 clause
4
+ # 2018
5
+ """
6
+ .. coauthor:: Haitham Selawi <haitham.selawi@mawdoo3.com>
7
+ .. coauthor:: Hussein Al-Natsheh <hussein.al-natsheh@cnrs.fr>
8
+
9
+ """
10
+ import keras .backend as K
11
+ from keras .models import load_model
12
+ from keras .models import model_from_yaml
13
+ from keras .preprocessing .sequence import pad_sequences
14
+ from keras .preprocessing .text import Tokenizer
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ import pickle
19
+
20
+ from scipy .stats import pearsonr
21
+
22
+ path = "../sensim/utils/LSTM/"
23
+
24
+ tokenizer = pickle .load (open (path + 'tokenizer.p' ,'rb' ))
25
+
26
+ yaml_file = open (path + 'model.yaml' , 'r' )
27
+ loaded_model_yaml = yaml_file .read ()
28
+ yaml_file .close ()
29
+
30
+ loaded_model = model_from_yaml (loaded_model_yaml )
31
+ loaded_model .load_weights (path + 'model.h5' )
32
+ loaded_model .compile (loss = 'mse' , optimizer = 'adam' )
33
+
34
+ maxlen = loaded_model .get_layer (index = 0 ).input_shape [1 ]
35
+
36
+ #A transformer function that takes 2 sentences (or vector of pairs)
37
+ # and returns the numpy array representation of them
38
+ def mlstm_transformer (X ):
39
+ Xt = np .zeros (len (X ), dtype = np .object )
40
+ for (i , x ) in enumerate (X ):
41
+ xt = tokenizer .texts_to_sequences (x )
42
+ xt = pad_sequences (xt , maxlen = maxlen )
43
+ xt = loaded_model .predict (xt )
44
+ Xt [i ] = xt
45
+ return Xt
46
+
47
+ #Element-wise version of mlstm_transformer
48
+ def mlstm_element_transformer (x ):
49
+ xt = tokenizer .texts_to_sequences (x )
50
+ xt = pad_sequences (xt , maxlen = maxlen )
51
+ xt = loaded_model .predict (xt )
52
+ return xt
53
+
54
+ def estimate (X ):
55
+ Xt = mlstm_transformer (X )
56
+ y = np .zeros (len (X ), dtype = np .float32 )
57
+ for (i , xt ) in enumerate (Xt ):
58
+ y [i ] = 5 * np .exp (- np .linalg .norm (xt [0 ] - xt [1 ], ord = 1 ))
59
+ return y
60
+
61
+ def read_tsv (path_to_file ):
62
+ df = pd .DataFrame ()
63
+ line_nb = 0
64
+ with open (path_to_file ) as file :
65
+ for line in file .readlines ():
66
+ for num , cell in enumerate (line .split ('\t ' )):
67
+ df .set_value (line_nb , 'column_{}' .format (num ),cell .strip ())
68
+ line_nb = line_nb + 1
69
+ return df
70
+
71
+ def df_2_dset (dframe , sent1_col = "Sent1" , sent2_col = "Sent2" , score_col = "Score" ):
72
+ X = dframe .as_matrix (columns = [sent1_col , sent2_col ])
73
+ y = dframe [score_col ].values
74
+ return X , y
75
+
76
+ def load_sts_benchmark_dataset (dframe_file ):
77
+ dframe = read_tsv (dframe_file )
78
+ dframe ["Score" ] = np .array (dframe ['column_4' ], dtype = np .float32 )
79
+ X , y = df_2_dset (dframe , sent1_col = "column_5" , sent2_col = "column_6" )
80
+ return X , y
81
+
82
+ def sts_score (X , y , decimals = 2 ):
83
+ y_est = estimate (X )
84
+ y_est [np .where (y_est > 5 )] = 5
85
+ y_est [np .where (y_est < 0 )] = 0
86
+ if decimals is not None :
87
+ y_est = np .round (y_est , decimals = decimals )
88
+ pickle .dump (y_est , open ("y_est.p" , "wb" ))
89
+ print ("y_est is pickled at y_est.p" )
90
+ return pearsonr (y_est , y )[0 ]
91
+
92
+ X_test , y_test = load_sts_benchmark_dataset (path + 'sts-test.csv' )
93
+ score = sts_score (X_test , y_test )#, decimals=None)
94
+ print score
95
+ pickle .dump (score , open ("score.p" , "wb" ))
96
+
97
+
98
+ """
99
+ #Example
100
+ # from the test set : 3.5 resulted as 3.62
101
+ line1_1 = 'A man is riding an electric bicycle.'
102
+ line1_2 = 'A man is riding a bicycle.'
103
+
104
+ # from the test set : 2.0, resulted as 2.31
105
+ line2_1 = 'A man is slicing a bun.'
106
+ line2_2 = 'A man is slicing a tomato.'
107
+
108
+ X = [[line1_1, line1_2],
109
+ [line2_1, line2_2]
110
+ ]
111
+
112
+
113
+ Xt = mlstm_transformer(X)
114
+
115
+ for xt in Xt:
116
+ print(5*np.exp(-np.linalg.norm(xt[0] - xt[1], ord = 1)))
117
+ """
0 commit comments