|
| 1 | +import torch |
| 2 | +import torch_musa |
| 3 | +from bert4torch.models import build_transformer_model |
| 4 | +from bert4torch.tokenizers import Tokenizer |
| 5 | +from transformers import BertConfig, BertTokenizer, BertModel |
| 6 | +import os |
| 7 | + |
| 8 | + |
| 9 | +device = 'musa' if torch.cuda.is_available() else 'cpu' |
| 10 | + |
| 11 | +def get_bert4torch_model(model_dir): |
| 12 | + config_path = model_dir + "/bert4torch_config.json" |
| 13 | + if not os.path.exists(config_path): |
| 14 | + config_path = model_dir + "/config.json" |
| 15 | + checkpoint_path = model_dir + '/pytorch_model.bin' |
| 16 | + |
| 17 | + model = build_transformer_model(config_path, checkpoint_path) # 建立模型,加载权重 |
| 18 | + return model.to(device) |
| 19 | + |
| 20 | + |
| 21 | +def get_hf_model(model_dir): |
| 22 | + tokenizer = BertTokenizer.from_pretrained(model_dir) |
| 23 | + model = BertModel.from_pretrained(model_dir) |
| 24 | + return model.to(device), tokenizer |
| 25 | + |
| 26 | + |
| 27 | +@pytest.mark.parametrize("model_dir", ["E:/data/pretrain_ckpt/bert/google@bert-base-chinese", |
| 28 | + "E:/data/pretrain_ckpt/bert/bert-base-multilingual-cased", |
| 29 | + "E:/data/pretrain_ckpt/bert/hfl@macbert-base", |
| 30 | + "E:/data/pretrain_ckpt/bert/hfl@chinese-bert-wwm-ext"]) |
| 31 | +@torch.inference_mode() |
| 32 | +def test_bert(model_dir): |
| 33 | + model = get_bert4torch_model(model_dir) |
| 34 | + model_hf, tokenizer = get_hf_model(model_dir) |
| 35 | + |
| 36 | + model.eval() |
| 37 | + model_hf.eval() |
| 38 | + |
| 39 | + inputs = tokenizer('语言模型', padding=True, return_tensors='pt').to(device) |
| 40 | + sequence_output = model(**inputs) |
| 41 | + sequence_output_hf = model_hf(**inputs).last_hidden_state |
| 42 | + print(f"Output mean diff: {(sequence_output - sequence_output_hf).abs().mean().item()}") |
| 43 | + |
| 44 | + assert (sequence_output - sequence_output_hf).abs().max().item() < 1e-4 |
| 45 | + |
| 46 | + |
| 47 | +if __name__=='__main__': |
| 48 | + test_bert("/data/bert-base-chinese/") |
0 commit comments