Can not load Qwen-14B-chat

I use the new [Qwen-14B-chat](https://huggingface.co/Qwen/Qwen-14B-Chat/tree/main), and it will raise
```
INFO 09-25 16:20:37 llm_engine.py:72] Initializing an LLM engine with config: model='/data/pretrained_models/Qwen-14B-Chat', tokenizer='/data/pretrained_models/Qwen-14B-Chat', tokenizer_mode=auto, revision=None, trust_remote_code=True, dtype=torch.bfloat16, download_dir=None, load_format=auto, tensor_parallel_size=2, quantization=None, seed=0)
WARNING 09-25 16:20:37 tokenizer.py:64] Using a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.
[2023-09-25 16:20:42 +0800] [34243] [ERROR] Exception in worker process
Traceback (most recent call last):
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/arbiter.py", line 609, in spawn_worker
    worker.init_process()
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/uvicorn/workers.py", line 66, in init_process
    super(UvicornWorker, self).init_process()
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/workers/base.py", line 134, in init_process
    self.load_wsgi()
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/workers/base.py", line 146, in load_wsgi
    self.wsgi = self.app.wsgi()
                ^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/base.py", line 67, in wsgi
    self.callable = self.load()
                    ^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/wsgiapp.py", line 58, in load
    return self.load_wsgiapp()
           ^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/app/wsgiapp.py", line 48, in load_wsgiapp
    return util.import_app(self.app_uri)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/gunicorn/util.py", line 371, in import_app
    mod = importlib.import_module(module)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/importlib/__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<frozen importlib._bootstrap>", line 1204, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/data/lijinghui/online_chat/chat_fastapi_vllm.py", line 35, in <module>
    model = LLM(
            ^^^^
  File "/data/lijinghui/vllm/vllm/entrypoints/llm.py", line 89, in __init__
    self.llm_engine = LLMEngine.from_engine_args(engine_args)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 226, in from_engine_args
    engine = cls(*engine_configs,
             ^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 103, in __init__
    self._init_workers_ray(placement_group)
  File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 176, in _init_workers_ray
    self._run_workers(
  File "/data/lijinghui/vllm/vllm/engine/llm_engine.py", line 692, in _run_workers
    all_outputs = ray.get(all_outputs)
                  ^^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/data/miniconda3/envs/ljh_py311/lib/python3.11/site-packages/ray/_private/worker.py", line 2524, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::RayWorker.execute_method() (pid=36439, ip=192.168.8.57, actor_id=f5052b1af599ae3ab380849c01000000, repr=<vllm.engine.ray_utils.RayWorker object at 0x7fc4902c1110>)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/engine/ray_utils.py", line 32, in execute_method
    return executor(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/worker/worker.py", line 67, in init_model
    self.model = get_model(self.model_config)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/model_executor/model_loader.py", line 100, in get_model
    model.load_weights(model_config.model, model_config.download_dir,
  File "/data/lijinghui/vllm/vllm/model_executor/models/qwen.py", line 290, in load_weights
    loaded_weight = loaded_weight.view(3, total_num_heads,
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: shape '[3, 32, 128]' is invalid for input of size 15360
2023-09-25 16:20:42,630 ERROR worker.py:405 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RayWorker.execute_method() (pid=36438, ip=192.168.8.57, actor_id=f5e010bb1e73c9731b73bffb01000000, repr=<vllm.engine.ray_utils.RayWorker object at 0x7f96efcf5910>)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/engine/ray_utils.py", line 32, in execute_method
    return executor(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/worker/worker.py", line 67, in init_model
    self.model = get_model(self.model_config)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/lijinghui/vllm/vllm/model_executor/model_loader.py", line 100, in get_model
    model.load_weights(model_config.model, model_config.download_dir,
  File "/data/lijinghui/vllm/vllm/model_executor/models/qwen.py", line 290, in load_weights
    loaded_weight = loaded_weight.view(3, total_num_heads,
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: shape '[3, 32, 128]' is invalid for input of size 15360
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Can not load Qwen-14B-chat #1172

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development