1
1
from __future__ import unicode_literals , print_function , division
2
2
import random
3
- from models .lstm_encoder import LSTMEncoder
4
- from models .lstm_decoder import LSTMDecoder
5
- from models .attention_decoder import AttentionDecoder
6
- from models .lstm_to_lstm import Seq2Seq
7
- from tokens_util import prepare_tokens , tensors_from_pair_tokens , plot_loss
3
+ from tokens_util import tensors_from_pair_tokens , plot_loss , tensors_from_pair_tokens_graph
8
4
9
5
import torch
10
6
import torch .nn as nn
13
9
import numpy as np
14
10
from metrics import compute_rouge_scores
15
11
import pickle
16
- import os
17
- import sys
18
12
19
- device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
20
13
21
- # lang, pairs = prepare_tokens()
22
- # # test_pairs = pairs[-10000:]
23
- # # val_pairs = pairs[-20000:-10000]
24
- # # train_pairs = pairs[:-20000]
25
- # # pairs = pairs[:100]
26
- # train_pairs, val_pairs, test_pairs = np.split(pairs, [int(.8*len(pairs)), int(.9*len(pairs))])
27
- #
28
- # test_pairs = test_pairs
29
- # val_pairs = val_pairs
30
- # train_pairs = train_pairs
14
+ device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
31
15
32
16
33
- def evaluate (seq2seq_model , eval_pairs , criterion , eval = 'val' ):
17
+ def evaluate (seq2seq_model , eval_pairs , criterion , eval = 'val' , graph = False ):
34
18
with torch .no_grad ():
35
19
loss = 0
36
20
f1 = 0
37
21
rouge_2 = 0
38
22
rouge_l = 0
39
23
for i in range (len (eval_pairs )):
40
- eval_pair = eval_pairs [i ]
41
- input_tensor = eval_pair [0 ]
42
- target_tensor = eval_pair [1 ]
24
+ if graph :
25
+ eval_pair = eval_pairs [i ]
26
+ input_tensor = eval_pair [0 ][0 ].to (device )
27
+ adj_tensor = eval_pair [0 ][1 ].to (device )
28
+ target_tensor = eval_pair [1 ].to (device )
29
+
30
+ output = seq2seq_model (sequence = input_tensor .view (- 1 ), adj = adj_tensor ,
31
+ target = target_tensor .view (- 1 ))
32
+ else :
33
+ eval_pair = eval_pairs [i ]
34
+ input_tensor = eval_pair [0 ]
35
+ target_tensor = eval_pair [1 ]
36
+
37
+ output = seq2seq_model (sequence = input_tensor .view (- 1 ), target = target_tensor .view (
38
+ - 1 ))
43
39
44
- output = seq2seq_model (input_tensor .view (- 1 ), target_tensor .view (- 1 ))
45
40
loss += criterion (output .view (- 1 , output .size (2 )), target_tensor .view (- 1 ))
46
41
pred = output .view (- 1 , output .size (2 )).argmax (1 ).cpu ().numpy ()
47
42
@@ -64,10 +59,15 @@ def evaluate(seq2seq_model, eval_pairs, criterion, eval='val'):
64
59
return loss , f1 , rouge_2 , rouge_l
65
60
66
61
67
- def train (input_tensor , target_tensor , seq2seq_model , optimizer , criterion ):
62
+ def train (input_tensor , target_tensor , seq2seq_model , optimizer , criterion , graph , adj_tensor = None ):
68
63
optimizer .zero_grad ()
69
64
70
- output = seq2seq_model (input_tensor .view (- 1 ), target_tensor .view (- 1 ))
65
+ if graph :
66
+ output = seq2seq_model (sequence = input_tensor .view (- 1 ), adj = adj_tensor ,
67
+ target = target_tensor .view (- 1 ))
68
+ else :
69
+ output = seq2seq_model (sequence = input_tensor .view (- 1 ), target = target_tensor .view (- 1 ))
70
+
71
71
loss = criterion (output .view (- 1 , output .size (2 )), target_tensor .view (- 1 ))
72
72
pred = output .view (- 1 , output .size (2 )).argmax (1 ).cpu ().numpy ()
73
73
@@ -79,7 +79,7 @@ def train(input_tensor, target_tensor, seq2seq_model, optimizer, criterion):
79
79
80
80
81
81
def train_iters (seq2seq_model , n_iters , pairs , print_every = 1000 , learning_rate = 0.001 ,
82
- model_dir = None , lang = None ):
82
+ model_dir = None , lang = None , graph = False ):
83
83
train_losses = []
84
84
val_losses = []
85
85
@@ -97,18 +97,35 @@ def train_iters(seq2seq_model, n_iters, pairs, print_every=1000, learning_rate=0
97
97
[int (.8 * len (pairs )), int (.9 * len (pairs ))])
98
98
99
99
optimizer = optim .Adam (seq2seq_model .parameters (), lr = learning_rate )
100
- training_pairs = [tensors_from_pair_tokens (random .choice (train_pairs ), lang )
101
- for i in range (n_iters )]
102
- val_tensor_pairs = [tensors_from_pair_tokens (val_pair , lang ) for val_pair in val_pairs ]
100
+
101
+ if graph :
102
+ training_pairs = [tensors_from_pair_tokens_graph (random .choice (train_pairs ), lang )
103
+ for i in range (n_iters )]
104
+ val_tensor_pairs = [tensors_from_pair_tokens_graph (val_pair , lang ) for val_pair in val_pairs ]
105
+ else :
106
+ training_pairs = [tensors_from_pair_tokens (random .choice (train_pairs ), lang )
107
+ for i in range (n_iters )]
108
+ val_tensor_pairs = [tensors_from_pair_tokens (val_pair , lang ) for val_pair in val_pairs ]
109
+
103
110
# test_tensor_pairs = [tensors_from_pair_tokens(test_pair, lang) for test_pair in test_pairs]
104
111
criterion = nn .NLLLoss ()
105
112
106
113
for iter in range (1 , n_iters + 1 ):
107
114
training_pair = training_pairs [iter - 1 ]
108
- input_tensor = training_pair [0 ]
109
- target_tensor = training_pair [1 ]
115
+ if graph :
116
+ input_tensor = training_pair [0 ][0 ].to (device )
117
+ adj_tensor = training_pair [0 ][1 ].to (device )
118
+ target_tensor = training_pair [1 ].to (device )
119
+
120
+ loss , pred = train (input_tensor , target_tensor , seq2seq_model , optimizer ,
121
+ criterion , adj_tensor = adj_tensor , graph = graph )
122
+ else :
123
+ input_tensor = training_pair [0 ]
124
+ target_tensor = training_pair [1 ]
125
+
126
+ loss , pred = train (input_tensor , target_tensor , seq2seq_model , optimizer , criterion ,
127
+ graph = graph )
110
128
111
- loss , pred = train (input_tensor , target_tensor , seq2seq_model , optimizer , criterion )
112
129
print_loss_total += loss
113
130
plot_loss_total += loss
114
131
@@ -138,7 +155,7 @@ def train_iters(seq2seq_model, n_iters, pairs, print_every=1000, learning_rate=0
138
155
139
156
train_loss = print_loss_avg
140
157
val_loss , val_f1 , val_rouge_2 , val_rouge_l = evaluate (seq2seq_model , val_tensor_pairs ,
141
- criterion )
158
+ criterion , graph = graph )
142
159
# test_loss, test_f1, test_rouge_2, test_rouge_l = evaluate(seq2seq_model,
143
160
# test_tensor_pairs,
144
161
# criterion, eval='test')
@@ -159,21 +176,3 @@ def train_iters(seq2seq_model, n_iters, pairs, print_every=1000, learning_rate=0
159
176
open (model_dir + 'res.pkl' , 'wb' ))
160
177
161
178
plot_loss (train_losses , val_losses , file_path = model_dir + 'loss.jpg' )
162
-
163
-
164
- # def main(model_name):
165
- # model_dir = '../results/{}/'.format(model_name)
166
- # if not os.path.exists(model_dir):
167
- # os.makedirs(model_dir)
168
- #
169
- # hidden_size = 256
170
- # encoder1 = LSTMEncoder(lang.n_words, hidden_size).to(device)
171
- # attn_decoder1 = LSTMDecoder(hidden_size, lang.n_words).to(device)
172
- # # attn_decoder1 = AttentionDecoder(hidden_size, lang.n_words).to(device)
173
- # lstm2lstm = Seq2Seq(encoder1, attn_decoder1, device)
174
- # train_iters(lstm2lstm, 500000, print_every=100, model_dir=model_dir)
175
- # # train_iters(lstm2lstm, 50, print_every=10, plot_every=1000)
176
- #
177
- #
178
- # if __name__ == "__main__":
179
- # main(sys.argv[1])
0 commit comments