Description
以下时报错内容,在容器里运行时的报错:
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/entrypoints/openai/api_server.py", line 624, in
engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 232, in from_engine_args
engine = cls(engine_args.worker_use_ray,
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 55, in init
self.engine = engine_class(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 104, in init
self._init_cache()
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 182, in _init_cache
num_blocks = self._run_workers(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 470, in _run_workers
output = executor(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/worker/worker.py", line 108, in profile_num_available_blocks
self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 292, in forward
hidden_states = self.model(input_ids, positions, kv_caches,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 260, in forward
hidden_states = layer(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 210, in forward
hidden_states = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 169, in forward
attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 202, in forward
self.multi_query_kv_attention(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 399, in multi_query_kv_attention
out = xops.memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 214, in memory_efficient_attention_forward
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 304, in _memory_efficient_attention_forward
inp.validate_inputs()
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/common.py", line 120, in validate_inputs
raise ValueError(
ValueError: Invalid shape for attention bias: torch.Size([40, 10, 10]) (expected (1, 40, 10, 10))
query.shape: torch.Size([1, 10, 40, 128])
key.shape : torch.Size([1, 10, 40, 128])
value.shape: torch.Size([1, 10, 40, 128])