Open
Description
Hi, we're testing m2-bert-80M-32k-retrieval
and running in inference we randomly get the following error:
outputs = self.model(**input_dict)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/bert_layers.py", line 956, in forward
outputs = self.bert(
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/bert_layers.py", line 528, in forward
encoder_outputs = self.encoder(
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/bert_layers.py", line 371, in forward
hidden_states = layer_module(hidden_states,
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/bert_layers.py", line 280, in forward
attention_output = self.attention(hidden_states)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/monarch_mixer_sequence_mixer.py", line 129, in forward
y = self.filter_fn(v, L, k_fwd=k, k_rev=k_rev, bias= self.filter_fn.bias[None, :, None])
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ezorita/.cache/pypoetry/virtualenvs/ml-benchmarks-NBkZU-eG-py3.9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/hyena_utils.py", line 251, in forward
y = fftconv_ref(
File "/home/ezorita/.cache/huggingface/modules/transformers_modules/togethercomputer/m2-bert-80M-32k-retrieval/a2ccdc5b5661a282c77545e586a019f387ab7a48/hyena_utils.py", line 42, in fftconv_ref
u_f = torch.fft.rfft(u.to(dtype=k.dtype), n=fft_size)
RuntimeError: cuFFT error: CUFFT_INTERNAL_ERROR
Any ideas?
Metadata
Metadata
Assignees
Labels
No labels