Open
Description
While trying out python inference/bot.py --retrieval --model togethercomputer/GPT-NeoXT-Chat-Base-20B
I got this error on A100 GPU:
File "inference/bot.py", line 185, in <module>
main()
File "inference/bot.py", line 173, in main
OpenChatKitShell(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/cmd.py", line 138, in cmdloop
stop = self.onecmd(line)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/cmd.py", line 217, in onecmd
return func(arg)
File "inference/bot.py", line 87, in do_say
output = self._model.do_inference(
File "inference/bot.py", line 32, in do_inference
outputs = self._model.generate(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/generation_utils.py", line 1326, in generate
return self.sample(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/generation_utils.py", line 1944, in sample
outputs = self(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 619, in forward
outputs = self.gpt_neox(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 511, in forward
outputs = layer(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 319, in forward
attention_layer_outputs = self.attention(
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 115, in forward
qkv = self.query_key_value(hidden_states)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/admin/home/anaconda3/envs/openkit/lib/python3.8/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Metadata
Metadata
Assignees
Labels
No labels