-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathsanity_check.py
44 lines (37 loc) · 1.64 KB
/
sanity_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import torch
from types import SimpleNamespace
from bert import BertModel, BertSelfAttention, BertLayer
from utils import *
sanity_data = torch.load("./sanity_check.data")
# Unit Test for BertModel
# text_batch = ["hello world", "hello neural network for NLP"]
# tokenizer here
sent_ids = torch.tensor([[101, 7592, 2088, 102, 0, 0, 0, 0],
[101, 7592, 15756, 2897, 2005, 17953, 2361, 102]])
att_mask = torch.tensor([[1, 1, 1, 1, 0, 0, 0, 0],[1, 1, 1, 1, 1, 1, 1, 1]])
# load our model
bert = BertModel.from_pretrained('bert-base-uncased')
bertlayer = bert.bert_layers[0]
bert_attn = bertlayer.self_attention
bert.eval()
bertlayer.eval()
bert_attn.eval()
# 1. BertModel.embed function
embed_outputs = bert.embed(input_ids=sent_ids)
assert torch.allclose(embed_outputs, sanity_data['embed_outputs'], rtol=1e-3, atol=1e-04)
print("Your BertModel.embed() implementation is correct!")
# 2. BertSoftAttention
hidden_states = embed_outputs
attention_mask: torch.Tensor = get_extended_attention_mask(att_mask, bert.dtype)
attn_outputs = bert_attn(hidden_states, attention_mask)
assert torch.allclose(attn_outputs, sanity_data['attn_outputs'], rtol=1e-3, atol=1e-04)
print("Your BertSelfAttention implementation is correct!")
# 2. BertLayer
layer_outputs = bertlayer(hidden_states, attention_mask)
assert torch.allclose(layer_outputs, sanity_data['layer_outputs'], rtol=1e-3, atol=1e-04)
print("Your BertLayer implementation is correct!")
# 4. BertModel
outputs = bert(sent_ids, att_mask)
for k in ['last_hidden_state', 'pooler_output']:
assert torch.allclose(outputs[k], sanity_data[k], rtol=1e-3, atol=1e-4)
print("Your BERT implementation is correct!")