Closed
Description
I use the new Qwen-14B-chat, and it will raise
INFO 09-25 16:20:37 llm_engine.py:72] Initializing an LLM engine with config: model='/data/pretrained_models/Qwen-14B-Chat', tokenizer='/data/pretrained_models/Qwen-14B-Chat', tokenizer_mode=auto, revision=None, trust_remote_code=True, dtype=torch.bfloat16, download_dir=None, load_format=auto, tensor_parallel_size=2, quantization=None, seed=0)
WARNING 09-25 16:20:37 tokenizer.py:64] Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.
[2023-09-25 16:20:42 +0800] [34243] [ERROR] Exception in worker process
Traceback (most recent call last):
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/arbiter.py", line 609, in spawn_worker
worker.init_process()
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/uvicorn/workers.py", line 66, in init_process
super(UvicornWorker, self).init_process()
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/workers/base.py", line 134, in init_process
self.load_wsgi()
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/workers/base.py", line 146, in load_wsgi
self.wsgi = self.app.wsgi()
^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/base.py", line 67, in wsgi
self.callable = self.load()
^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/wsgiapp.py", line 58, in load
return self.load_wsgiapp()
^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/wsgiapp.py", line 48, in load_wsgiapp
return util.import_app(self.app_uri)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/util.py", line 371, in import_app
mod = importlib.import_module(module)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen importlib._bootstrap>", line 1204, in _gcd_import
File "<frozen importlib._bootstrap>", line 1176, in _find_and_load
File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 940, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/data/lijinghui/online_chat/chat_fastapi_vllm.py", line 35, in <module>
model = LLM(
^^^^
File "/data/lijinghui/vllm/vllm/entrypoints/llm.py", line 89, in __init__
self.llm_engine = LLMEngine.from_engine_args(engine_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 226, in from_engine_args
engine = cls(*engine_configs,
^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 103, in __init__
self._init_workers_ray(placement_group)
File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 176, in _init_workers_ray
self._run_workers(
File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 692, in _run_workers
all_outputs = ray.get(all_outputs)
^^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/worker.py", line 2524, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::RayWorker.execute_method() (pid=36439, ip=192.168.8.57, actor_id=f5052b1af599ae3ab380849c01000000, repr=<vllm.engine.ray_utils.RayWorker object at 0x7fc4902c1110>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/engine/ray_utils.py", line 32, in execute_method
return executor(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/worker/worker.py", line 67, in init_model
self.model = get_model(self.model_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/model_executor/model_loader.py", line 100, in get_model
model.load_weights(model_config.model, model_config.download_dir,
File "/data/lijinghui/vllm/vllm/model_executor/models/qwen.py", line 290, in load_weights
loaded_weight = loaded_weight.view(3, total_num_heads,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: shape '[3, 32, 128]' is invalid for input of size 15360
2023-09-25 16:20:42,630 ERROR worker.py:405 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RayWorker.execute_method() (pid=36438, ip=192.168.8.57, actor_id=f5e010bb1e73c9731b73bffb01000000, repr=<vllm.engine.ray_utils.RayWorker object at 0x7f96efcf5910>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/engine/ray_utils.py", line 32, in execute_method
return executor(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/worker/worker.py", line 67, in init_model
self.model = get_model(self.model_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/lijinghui/vllm/vllm/model_executor/model_loader.py", line 100, in get_model
model.load_weights(model_config.model, model_config.download_dir,
File "/data/lijinghui/vllm/vllm/model_executor/models/qwen.py", line 290, in load_weights
loaded_weight = loaded_weight.view(3, total_num_heads,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: shape '[3, 32, 128]' is invalid for input of size 15360
Metadata
Assignees
Labels
No labels