You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import os
import time
import paddle
from paddle import nn
from paddlenlp.transformers import BertModel, BertTokenizer, ErnieModel, ErnieTokenizer
from paddlenlp.transformers import GPTTokenizer, GPTLMHeadModel
from paddlenlp.transformers import Llama3Tokenizer, LlamaModel
import numpy as np
def benchmark(net, input_ids, token_type_ids, repeat=5, warmup=3):
# warm up
for _ in range(warmup):
net(input_ids, token_type_ids)
paddle.device.synchronize()
# time
t = []
for _ in range(repeat):
t1 = time.time()
net(input_ids, token_type_ids)
paddle.device.synchronize()
t2 = time.time()
t.append((t2 - t1)*1000)
print("--[benchmark] Run for %d times, the average latency is: %f ms" % (repeat, np.mean(t)))
class TestBase:
def __init__(self):
device_info = paddle.get_device()
print("Current Paddle device : %s"%(device_info))
self.net = None
self.input = None
self.cinn_net = None
def to_eval(self, use_cinn):
set_flags(use_cinn)
if use_cinn:
if not self.cinn_net:
self.cinn_net = to_cinn_net(self.net)
net = self.cinn_net
else:
net = self.net
net.eval()
return net
def eval(self, use_cinn):
net = self.to_eval(use_cinn)
out = net(self.input)
return out
def check_cinn_output(self):
pd_out = self.eval(use_cinn=False)
cinn_out = self.eval(use_cinn=True)
np.testing.assert_allclose(
cinn_out.numpy(), pd_out.numpy(), atol=1e-3, rtol=1e-3
)
print("--[check_cinn_output] cinn result right.")
def benchmark(self, use_cinn):
print("--[benchmark] benchmark %s" % ("cinn" if use_cinn else "nocinn"))
net = self.to_eval(use_cinn)
benchmark(net, self.input)
class TestGPT(TestBase):
def __init__(self, batch_size=1):
super().__init__()
max_seq_length = 1024 # 最大序列长度
model_name = 'gpt-cpm-small-cn-distill'
self.net = GPTLMHeadModel.from_pretrained(model_name)
self.tokenizer = GPTTokenizer.from_pretrained(model_name)
# 随机生成输入数据
encoded_text = self.tokenizer(text="请输入测试样例")
self.input_ids = paddle.to_tensor([encoded_text['input_ids']])
self.token_type_ids = paddle.to_tensor([encoded_text['token_type_ids']])
def to_eval(self, use_cinn):
set_flags(use_cinn)
if use_cinn:
if not self.cinn_net:
self.cinn_net = to_cinn_net(self.net)
net = self.cinn_net
else:
net = self.net
net.eval()
return net
def eval(self, use_cinn):
net = self.to_eval(use_cinn)
out = net(self.input_ids, self.token_type_ids)
return out
def check_cinn_output(self):
pd_out = self.eval(use_cinn=False)
cinn_out = self.eval(use_cinn=True)
np.testing.assert_allclose(
cinn_out.last_hidden_state.numpy(), pd_out.last_hidden_state.numpy(), atol=1e-3, rtol=1e-3
)
print("--[check_cinn_output] cinn result right.")
def benchmark(self, use_cinn):
print("--[benchmark] benchmark %s" % ("cinn" if use_cinn else "nocinn"))
net = self.to_eval(use_cinn)
benchmark(net, self.input_ids, self.token_type_ids)
if __name__ == "__main__":
print("Test GPT Model gpt-cpm-small ........")
model = TestGPT()
model.benchmark(use_cinn=False)
执行后输出结果为:
/home/wzy/.local/lib/python3.8/site-packages/_distutils_hack/__init__.py:26: UserWarning: Setuptools is replacing distutils.
warnings.warn("Setuptools is replacing distutils.")
Test GPT Model gpt-cpm-small ........
Current Paddle device : gpu:0
[2024-09-20 01:37:50,268] [ INFO] - Loading weights file from cache at /home/wzy/.paddlenlp/models/gpt-cpm-large-cn/model_state.pdparams
[2024-09-20 01:38:03,948] [ INFO] - Loaded weights file from disk, setting weights to model.
W0920 01:38:03.950428 12651 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2
W0920 01:38:03.953485 12651 gpu_resources.cc:164] device: 0, cuDNN Version: 8.1.
[2024-09-20 01:38:30,694] [ INFO] - All model checkpoint weights were used when initializing GPTForCausalLM.
[2024-09-20 01:38:30,695] [ WARNING] - Some weights of GPTForCausalLM were not initialized from the model checkpoint at gpt-cpm-large-cn and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[2024-09-20 01:38:30,995] [ INFO] - Generation config file not found, using a generation config created from the model config.
Traceback (most recent call last):
File "benchmark_lstm.py", line 297, in <module>
model = TestGPT()
File "benchmark_lstm.py", line 241, in __init__
self.tokenizer = GPTTokenizer.from_pretrained(model_name)
File "/home/wzy/.local/lib/python3.8/site-packages/paddlenlp/transformers/tokenizer_utils.py", line 709, in from_pretrained
tokenizer, tokenizer_config_file_dir = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
File "/home/wzy/.local/lib/python3.8/site-packages/paddlenlp/transformers/tokenizer_utils_base.py", line 1515, in from_pretrained
assert len(tokenizer_config_file_dir_list) > 0, "All tokenizer files should be in the same directory."
AssertionError: All tokenizer files should be in the same directory.
重复问题
I have searched the existing issues
错误描述
/home/wzy/.local/lib/python3.8/site-packages/_distutils_hack/__init__.py:26: UserWarning: Setuptools is replacing distutils.
warnings.warn("Setuptools is replacing distutils.")
Test GPT Model gpt-cpm-small ........
Current Paddle device : gpu:0
[2024-09-20 01:37:50,268][ INFO] - Loading weights file from cache at /home/wzy/.paddlenlp/models/gpt-cpm-large-cn/model_state.pdparams
[2024-09-20 01:38:03,948][ INFO] - Loaded weights file from disk, setting weights to model.
W0920 01:38:03.950428 12651 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2
W0920 01:38:03.953485 12651 gpu_resources.cc:164] device: 0, cuDNN Version: 8.1.
[2024-09-20 01:38:30,694][ INFO] - All model checkpoint weights were used when initializing GPTForCausalLM.
[2024-09-20 01:38:30,695][ WARNING] - Some weights of GPTForCausalLM were not initialized from the model checkpoint at gpt-cpm-large-cn and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[2024-09-20 01:38:30,995][ INFO] - Generation config file not found, using a generation config created from the model config.
Traceback (most recent call last):
File "benchmark_lstm.py", line 297, in <module>
model = TestGPT()
File "benchmark_lstm.py", line 241, in __init__
self.tokenizer = GPTTokenizer.from_pretrained(model_name)
File "/home/wzy/.local/lib/python3.8/site-packages/paddlenlp/transformers/tokenizer_utils.py", line 709, in from_pretrained
tokenizer, tokenizer_config_file_dir = super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
File "/home/wzy/.local/lib/python3.8/site-packages/paddlenlp/transformers/tokenizer_utils_base.py", line 1515, in from_pretrained
assert len(tokenizer_config_file_dir_list) > 0, "All tokenizer files should be in the same directory."
AssertionError: All tokenizer files should be in the same directory.
稳定复现步骤 & 代码
import os
import time
import paddle
from paddle import nn
from paddlenlp.transformers import BertModel, BertTokenizer, ErnieModel, ErnieTokenizer
from paddlenlp.transformers import GPTTokenizer, GPTLMHeadModel
from paddlenlp.transformers import Llama3Tokenizer, LlamaModel
import numpy as np
def benchmark(net, input_ids, token_type_ids, repeat=5, warmup=3):
# warm up
for _ in range(warmup):
net(input_ids, token_type_ids)
paddle.device.synchronize()
# time
t = []
for _ in range(repeat):
t1 = time.time()
net(input_ids, token_type_ids)
paddle.device.synchronize()
t2 = time.time()
t.append((t2 - t1)*1000)
print("--[benchmark] Run for %d times, the average latency is: %f ms" % (repeat, np.mean(t)))
class TestBase:
def __init__(self):
device_info = paddle.get_device()
print("Current Paddle device : %s"%(device_info))
self.net = None
self.input = None
self.cinn_net = None
def to_eval(self, use_cinn):
set_flags(use_cinn)
if use_cinn:
if not self.cinn_net:
self.cinn_net = to_cinn_net(self.net)
net = self.cinn_net
else:
net = self.net
net.eval()
return net
def eval(self, use_cinn):
net = self.to_eval(use_cinn)
out = net(self.input)
return out
def check_cinn_output(self):
pd_out = self.eval(use_cinn=False)
cinn_out = self.eval(use_cinn=True)
np.testing.assert_allclose(
cinn_out.numpy(), pd_out.numpy(), atol=1e-3, rtol=1e-3
)
print("--[check_cinn_output] cinn result right.")
def benchmark(self, use_cinn):
print("--[benchmark] benchmark %s" % ("cinn" if use_cinn else "nocinn"))
net = self.to_eval(use_cinn)
benchmark(net, self.input)
class TestGPT(TestBase):
def __init__(self, batch_size=1):
super().__init__()
max_seq_length = 1024 # 最大序列长度
model_name = 'gpt-cpm-small-cn-distill'
self.net = GPTLMHeadModel.from_pretrained(model_name)
self.tokenizer = GPTTokenizer.from_pretrained(model_name)
# 随机生成输入数据
encoded_text = self.tokenizer(text="请输入测试样例")
self.input_ids = paddle.to_tensor([encoded_text['input_ids']])
self.token_type_ids = paddle.to_tensor([encoded_text['token_type_ids']])
def to_eval(self, use_cinn):
set_flags(use_cinn)
if use_cinn:
if not self.cinn_net:
self.cinn_net = to_cinn_net(self.net)
net = self.cinn_net
else:
net = self.net
net.eval()
return net
def eval(self, use_cinn):
net = self.to_eval(use_cinn)
out = net(self.input_ids, self.token_type_ids)
return out
def check_cinn_output(self):
pd_out = self.eval(use_cinn=False)
cinn_out = self.eval(use_cinn=True)
np.testing.assert_allclose(
cinn_out.last_hidden_state.numpy(), pd_out.last_hidden_state.numpy(), atol=1e-3, rtol=1e-3
)
print("--[check_cinn_output] cinn result right.")
def benchmark(self, use_cinn):
print("--[benchmark] benchmark %s" % ("cinn" if use_cinn else "nocinn"))
net = self.to_eval(use_cinn)
benchmark(net, self.input_ids, self.token_type_ids)
if __name__ == "__main__":
print("Test GPT Model gpt-cpm-small ........")
model = TestGPT()
model.benchmark(use_cinn=False)
The text was updated successfully, but these errors were encountered:
软件环境
Markdown
尝试使用paddlenlp执行gpt模型的推理。选用模型gpt-cpm-small-cn-distill
下面是执行脚本
benchmark_gpt.py
执行后输出结果为:
重复问题
错误描述
稳定复现步骤 & 代码
The text was updated successfully, but these errors were encountered: