-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_lstm_crf.py
179 lines (154 loc) · 5.94 KB
/
run_lstm_crf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# -*- coding: utf-8 -*-
"""
Author: bin zhou
Date: 2020-05-28
"""
import os
import logging
import logging.config
import yaml
import torch
import torch.optim as optim
import time
from constants import ROOT_PATH
from model.lstm_crf import BiLstmCrf
from read_data import Data
from utils.functions import batchify_sequence_labeling_with_label, predict_check, evalute_sequence_labeling
logger = logging.getLogger(__name__)
logging.config.fileConfig(os.path.join(ROOT_PATH, 'logging.conf'), disable_existing_loggers=False)
yml_path = os.path.join(ROOT_PATH, 'conf/model.yaml')
# 删除已存在的模型文件
def use_delete(if_del=True):
def remove_exist_file(func):
def wrapper(*args, **kwargs):
if if_del:
model_path = os.path.join(ROOT_PATH, 'saved_models/bilstm_crf')
model_files = os.listdir(model_path)
for i in model_files:
if i.startswith('.'):
continue
name = os.path.join(model_path, i)
os.remove(name)
else:
pass
return func(*args, **kwargs)
return wrapper
return remove_exist_file
class Run(object):
def __init__(self, configs):
self.configs = configs
self.encoder_type = 'bilstm_crf'
@use_delete(True)
def train(self, data):
if self.encoder_type == 'bilstm_crf':
model = BiLstmCrf(data, self.configs)
else:
logger.info('No model is selected!')
return
optimizer = optim.SGD(
model.parameters(), lr=self.configs['lr'], momentum=self.configs['momentum'], weight_decay=self.configs['l2'])
if self.configs['gpu']:
model = model.cuda()
best_dev = -10
last_improved = 0
for idx in range(self.configs['epoch']):
epoch_start = time.time()
temp_start = epoch_start
logger.info("Epoch: %s/%s" % (idx, self.configs['epoch']))
optimizer = self.lr_decay(optimizer, idx, self.configs['lr_decay'], self.configs['lr'])
sample_loss = 0
total_loss = 0
right_token = 0
whole_token = 0
logger.info('first input word list: %s, %s' % (data.train_texts[0][1], data.train_ids[0][1]))
model.train()
model.zero_grad()
batch_size = self.configs['batch_size']
train_num = len(data.train_ids)
logger.info('batch_size: %s, train_num: %s' % (batch_size, train_num))
total_batch = train_num // batch_size + 1
for batch_id in range(total_batch):
start = batch_id * batch_size
end = (batch_id + 1) * batch_size
if end > train_num:
end = train_num
instance = data.train_ids[start:end]
if not instance:
continue
batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, \
batch_label, mask = batchify_sequence_labeling_with_label(instance, self.configs['gpu'], if_train=True)
loss, tag_seq = model(batch_word, batch_wordlen, batch_wordrecover, mask, batch_label)
right, whole = predict_check(tag_seq, batch_label, mask)
right_token += right
whole_token += whole
sample_loss += loss.item()
total_loss += loss.item()
if end % 6400 == 0:
temp_time = time.time()
temp_cost = temp_time - temp_start
temp_start = temp_time
logger.info("Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))
if sample_loss > 1e8 or str(sample_loss) == "nan":
print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
exit(1)
# 每次sample_loss要清零
sample_loss = 0
loss.backward()
optimizer.step()
model.zero_grad()
temp_time = time.time()
temp_cost = temp_time - temp_start
logger.info("Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" % (
end, temp_cost, sample_loss, right_token, whole_token, (right_token + 0.) / whole_token))
epoch_finish = time.time()
epoch_cost = epoch_finish - epoch_start
logger.info("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s, total loss: %s" % (
idx, epoch_cost, train_num / epoch_cost, total_loss))
# logger.info("Epoch: %s, Total loss: %s" % (idx, total_loss))
speed, acc, p, r, f, _, _ = evalute_sequence_labeling(data, model, "dev", self.configs)
dev_finish = time.time()
dev_cost = dev_finish - epoch_finish
current_score = f
logger.info("Epoch: %s, Loss: %s, Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
idx, total_loss, dev_cost, speed, acc, p, r, f))
if current_score > best_dev:
model_name = self.configs['model_path'] + '.' + str(idx) + '.model'
torch.save(model.state_dict(), model_name)
best_dev = current_score
last_improved = idx
speed, acc, p, r, f, _, _ = evalute_sequence_labeling(data, model, "test", self.configs)
test_finish = time.time()
test_cost = test_finish - dev_finish
logger.info("Epoch: %s, Loss: %s, Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f" % (
idx, total_loss, test_cost, speed, acc, p, r, f))
# add early-stopping
if idx - last_improved > self.configs['require_improvement']:
logger.info('No optimization for %s epoch, auto-stopping' % self.configs['require_improvement'])
break
@staticmethod
def lr_decay(optimzer, epoch, decay_rate, init_lr):
lr = init_lr / (1 + decay_rate * epoch)
logging.info("Learning rate is set as: %s", lr)
for param_group in optimzer.param_groups:
param_group['lr'] = lr
return optimzer
@classmethod
def read_configs(cls):
with open(yml_path, 'r') as rf:
configs = yaml.load(rf, Loader=yaml.FullLoader)
# 读取设备基本属性
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpu = True if device.type == 'cuda' else False
map_location = 'cpu' if gpu is False else None
configs.update({'device': device, 'gpu': gpu, 'map_location': map_location})
# 读取model_num对应的模型超参数
model_num = configs['model_num']
for k, v in configs['model'][model_num].items():
configs[k] = v
del configs['model']
return cls(configs)
if __name__ == '__main__':
data = Data()
run = Run.read_configs()
run.train(data)