-
Notifications
You must be signed in to change notification settings - Fork 525
/
Copy pathconvert_bert_from_uer_to_original_tf.py
110 lines (92 loc) · 6.04 KB
/
convert_bert_from_uer_to_original_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import sys
import os
import argparse
import collections
import numpy as np
import tensorflow as tf
import torch
import tensorflow.keras.backend as K
uer_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, uer_dir)
from scripts.convert_bert_from_original_tf_to_uer import tensors_to_transopse
def assign_tf_var(tensor: np.ndarray, name: str):
tf_var = tf.get_variable(dtype=tensor.dtype, shape=tensor.shape, name=name)
tf.keras.backend.set_value(tf_var, tensor)
return tf_var
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--layers_num", type=int, default=12,
help=".")
parser.add_argument("--input_model_path", type=str, default="models/input_model.bin",
help=".")
parser.add_argument("--output_model_path", type=str, default="models/output_model.ckpt",
help=".")
parser.add_argument("--type", choices=["bert", "mlm"], default="bert",
help="The training target of the pretraining model.")
args = parser.parse_args()
input_model = torch.load(args.input_model_path, map_location="cpu")
session = tf.Session()
K.set_session(session)
output_model = collections.OrderedDict()
output_model["bert/embeddings/word_embeddings"] = input_model["embedding.word.embedding.weight"]
output_model["bert/embeddings/position_embeddings"] = input_model["embedding.pos.embedding.weight"]
output_model["bert/embeddings/token_type_embeddings"] = input_model["embedding.seg.embedding.weight"][1:, :]
output_model["bert/embeddings/LayerNorm/gamma"] = input_model["embedding.layer_norm.gamma"]
output_model["bert/embeddings/LayerNorm/beta"] = input_model["embedding.layer_norm.beta"]
for i in range(args.layers_num):
output_model["bert/encoder/layer_" + str(i) + "/attention/self/query/kernel"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.0.weight"]
output_model["bert/encoder/layer_" + str(i) + "/attention/self/query/bias"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.0.bias"]
output_model["bert/encoder/layer_" + str(i) + "/attention/self/key/kernel"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.1.weight"]
output_model["bert/encoder/layer_" + str(i) + "/attention/self/key/bias"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.1.bias"]
output_model["bert/encoder/layer_" + str(i) + "/attention/self/value/kernel"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.2.weight"]
output_model["bert/encoder/layer_" + str(i) + "/attention/self/value/bias"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.linear_layers.2.bias"]
output_model["bert/encoder/layer_" + str(i) + "/attention/output/dense/kernel"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.final_linear.weight"]
output_model["bert/encoder/layer_" + str(i) + "/attention/output/dense/bias"] = \
input_model["encoder.transformer." + str(i) + ".self_attn.final_linear.bias"]
output_model["bert/encoder/layer_" + str(i) + "/attention/output/LayerNorm/gamma"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_1.gamma"]
output_model["bert/encoder/layer_" + str(i) + "/attention/output/LayerNorm/beta"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_1.beta"]
output_model["bert/encoder/layer_" + str(i) + "/intermediate/dense/kernel"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.weight"]
output_model["bert/encoder/layer_" + str(i) + "/intermediate/dense/bias"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_1.bias"]
output_model["bert/encoder/layer_" + str(i) + "/output/dense/kernel"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.weight"]
output_model["bert/encoder/layer_" + str(i) + "/output/dense/bias"] = \
input_model["encoder.transformer." + str(i) + ".feed_forward.linear_2.bias"]
output_model["bert/encoder/layer_" + str(i) + "/output/LayerNorm/gamma"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_2.gamma"]
output_model["bert/encoder/layer_" + str(i) + "/output/LayerNorm/beta"] = \
input_model["encoder.transformer." + str(i) + ".layer_norm_2.beta"]
if args.type == "bert":
output_model["bert/pooler/dense/kernel"] = input_model["target.sp.linear_1.weight"]
output_model["bert/pooler/dense/bias"] = input_model["target.sp.linear_1.bias"]
output_model["cls/seq_relationship/output_weights"] = input_model["target.sp.linear_2.weight"]
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm.linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.mlm.layer_norm.gamma"]
output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.mlm.layer_norm.beta"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm.linear_2.bias"]
tf_vars = []
for k, v in output_model.items():
tf_name = k
torch_tensor = v.cpu().numpy()
if any([x in k for x in tensors_to_transopse]):
torch_tensor = torch_tensor.T
tf_tensor = assign_tf_var(tensor=torch_tensor, name=tf_name)
tf_vars.append(tf_tensor)
print("{0}{1}initialized".format(tf_name, " " * (60 - len(tf_name))))
saver = tf.train.Saver(tf_vars)
saver.save(session, args.output_model_path)
K.clear_session()
if __name__ == "__main__":
main()