Closed
Description
Your current environment
I''m using two tesla t4
from vllm import LLM
from vllm.sampling_params import SamplingParams
import torch
MODEL_NAME = "Qwen/Qwen2-VL-7B-Instruct"
# Define the model and sampling parameters
MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"
sampling_params = SamplingParams(max_tokens=300, temperature=0.01, top_p=0.001)
# Define context length and number of devices
context_length = 3000
num_device = 2
# Initialize the LLM with the allowed local media path
llm = LLM(
model=MODEL_NAME,
tokenizer=MODEL_NAME,
speculative_max_model_len=context_length,
max_seq_len_to_capture=context_length,
max_model_len=context_length,
tensor_parallel_size=num_device,
trust_remote_code=True,
# worker_use_ray=num_device,
dtype=torch.float16,
enable_chunked_prefill=True,
gpu_memory_utilization=0.90,
enforce_eager=True,
max_num_batched_tokens=context_length,
allowed_local_media_path="/kaggle/working/" # Add this line
,
# quantization="fp8"
# enable_prefix_caching=True,
mm_processor_kwargs={"max_dynamic_patch": 1}
)
# Define the prompt and image path
prompt = "Describe this image in one sentence."
image_path = "file:///kaggle/working/dubu.png" # Convert local path to file:// URI
# image_path = "file:///kaggle/working/image.png" # Convert local path to file:// URI
# Create the structured input format
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": image_path # Use the file:// URI
}
},
],
},
]
# Generate outputs
outputs = llm.chat(
messages,
sampling_params=sampling_params,
)
# Print the output
print(outputs[0].outputs[0].text)
INFO 02-08 02:36:39 __init__.py:190] Automatically detected platform cuda.
config.json: 100%
1.37k/1.37k [00:00<00:00, 127kB/s]
preprocessor_config.json: 100%
353/353 [00:00<00:00, 38.2kB/s]
WARNING 02-08 02:36:40 config.py:2386] Casting torch.bfloat16 to torch.float16.
ERROR 02-08 02:36:50 registry.py:306] Error in inspecting model architecture 'Qwen2_5_VLForConditionalGeneration'
ERROR 02-08 02:36:50 registry.py:306] Traceback (most recent call last):
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 507, in _run_in_subprocess
ERROR 02-08 02:36:50 registry.py:306] returned.check_returncode()
ERROR 02-08 02:36:50 registry.py:306] File "/usr/lib/python3.10/subprocess.py", line 457, in check_returncode
ERROR 02-08 02:36:50 registry.py:306] raise CalledProcessError(self.returncode, self.args, self.stdout,
ERROR 02-08 02:36:50 registry.py:306] subprocess.CalledProcessError: Command '['/usr/bin/python3', '-m', 'vllm.model_executor.models.registry']' returned non-zero exit status 1.
ERROR 02-08 02:36:50 registry.py:306]
ERROR 02-08 02:36:50 registry.py:306] The above exception was the direct cause of the following exception:
ERROR 02-08 02:36:50 registry.py:306]
ERROR 02-08 02:36:50 registry.py:306] Traceback (most recent call last):
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 304, in _try_inspect_model_cls
ERROR 02-08 02:36:50 registry.py:306] return model.inspect_model_cls()
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 275, in inspect_model_cls
ERROR 02-08 02:36:50 registry.py:306] return _run_in_subprocess(
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 510, in _run_in_subprocess
ERROR 02-08 02:36:50 registry.py:306] raise RuntimeError(f"Error raised in subprocess:\n"
ERROR 02-08 02:36:50 registry.py:306] RuntimeError: Error raised in subprocess:
ERROR 02-08 02:36:50 registry.py:306] 2025-02-08 02:36:45.612461: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
ERROR 02-08 02:36:50 registry.py:306] 2025-02-08 02:36:45.633154: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
ERROR 02-08 02:36:50 registry.py:306] 2025-02-08 02:36:45.639437: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
ERROR 02-08 02:36:50 registry.py:306] /usr/lib/python3.10/runpy.py:126: RuntimeWarning: 'vllm.model_executor.models.registry' found in sys.modules after import of package 'vllm.model_executor.models', but prior to execution of 'vllm.model_executor.models.registry'; this may result in unpredictable behaviour
ERROR 02-08 02:36:50 registry.py:306] warn(RuntimeWarning(msg))
ERROR 02-08 02:36:50 registry.py:306] Traceback (most recent call last):
ERROR 02-08 02:36:50 registry.py:306] File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
ERROR 02-08 02:36:50 registry.py:306] return _run_code(code, main_globals, None,
ERROR 02-08 02:36:50 registry.py:306] File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
ERROR 02-08 02:36:50 registry.py:306] exec(code, run_globals)
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 531, in <module>
ERROR 02-08 02:36:50 registry.py:306] _run()
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 524, in _run
ERROR 02-08 02:36:50 registry.py:306] result = fn()
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 276, in <lambda>
ERROR 02-08 02:36:50 registry.py:306] lambda: _ModelInfo.from_model_cls(self.load_model_cls()))
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py", line 279, in load_model_cls
ERROR 02-08 02:36:50 registry.py:306] mod = importlib.import_module(self.module_name)
ERROR 02-08 02:36:50 registry.py:306] File "/usr/lib/python3.10/importlib/__init__.py", line 126, in import_module
ERROR 02-08 02:36:50 registry.py:306] return _bootstrap._gcd_import(name[level:], package, level)
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap_external>", line 883, in exec_module
ERROR 02-08 02:36:50 registry.py:306] File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/qwen2_5_vl.py", line 51, in <module>
ERROR 02-08 02:36:50 registry.py:306] from vllm.model_executor.layers.quantization.gptq import GPTQConfig
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/quantization/gptq.py", line 15, in <module>
ERROR 02-08 02:36:50 registry.py:306] from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/vocab_parallel_embedding.py", line 139, in <module>
ERROR 02-08 02:36:50 registry.py:306] def get_masked_input_and_mask(
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/__init__.py", line 2424, in fn
ERROR 02-08 02:36:50 registry.py:306] return compile(
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/__init__.py", line 2447, in compile
ERROR 02-08 02:36:50 registry.py:306] return torch._dynamo.optimize(
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 716, in optimize
ERROR 02-08 02:36:50 registry.py:306] return _optimize(rebuild_ctx, *args, **kwargs)
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 790, in _optimize
ERROR 02-08 02:36:50 registry.py:306] compiler_config=backend.get_compiler_config()
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/__init__.py", line 2237, in get_compiler_config
ERROR 02-08 02:36:50 registry.py:306] from torch._inductor.compile_fx import get_patched_config_dict
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_inductor/compile_fx.py", line 49, in <module>
ERROR 02-08 02:36:50 registry.py:306] from torch._inductor.debug import save_args_for_compile_fx_inner
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_inductor/debug.py", line 26, in <module>
ERROR 02-08 02:36:50 registry.py:306] from . import config, ir # noqa: F811, this is needed
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_inductor/ir.py", line 77, in <module>
ERROR 02-08 02:36:50 registry.py:306] from .runtime.hints import ReductionHint
ERROR 02-08 02:36:50 registry.py:306] File "/usr/local/lib/python3.10/dist-packages/torch/_inductor/runtime/hints.py", line 36, in <module>
ERROR 02-08 02:36:50 registry.py:306] attr_desc_fields = {f.name for f in fields(AttrsDescriptor)}
ERROR 02-08 02:36:50 registry.py:306] File "/usr/lib/python3.10/dataclasses.py", line 1198, in fields
ERROR 02-08 02:36:50 registry.py:306] raise TypeError('must be called with a dataclass type or instance') from None
ERROR 02-08 02:36:50 registry.py:306] TypeError: must be called with a dataclass type or instance
ERROR 02-08 02:36:50 registry.py:306]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-28d98e80f319> in <cell line: 16>()
14
15 # Initialize the LLM with the allowed local media path
---> 16 llm = LLM(
17 model=MODEL_NAME,
18 tokenizer=MODEL_NAME,
/usr/local/lib/python3.10/dist-packages/vllm/utils.py in inner(*args, **kwargs)
1049 )
1050
-> 1051 return fn(*args, **kwargs)
1052
1053 return inner # type: ignore
/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/llm.py in __init__(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, allowed_local_media_path, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, hf_overrides, mm_processor_kwargs, task, override_pooler_config, compilation_config, **kwargs)
240 # to avoid import order issues
241 self.engine_class = self.get_engine_class()
--> 242 self.llm_engine = self.engine_class.from_engine_args(
243 engine_args, usage_context=UsageContext.LLM_CLASS)
244
/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py in from_engine_args(cls, engine_args, usage_context, stat_loggers)
479 """Creates an LLM engine from the engine arguments."""
480 # Create the engine configs.
--> 481 engine_config = engine_args.create_engine_config(usage_context)
482 executor_class = cls._get_executor_cls(engine_config)
483 # Create the LLM engine.
/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py in create_engine_config(self, usage_context)
1073
1074 device_config = DeviceConfig(device=self.device)
-> 1075 model_config = self.create_model_config()
1076
1077 if (model_config.is_multimodal_model and not envs.VLLM_USE_V1
/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py in create_model_config(self)
996
997 def create_model_config(self) -> ModelConfig:
--> 998 return ModelConfig(
999 model=self.model,
1000 task=self.task,
/usr/local/lib/python3.10/dist-packages/vllm/config.py in __init__(self, model, task, tokenizer, tokenizer_mode, trust_remote_code, dtype, seed, allowed_local_media_path, revision, code_revision, rope_scaling, rope_theta, tokenizer_revision, max_model_len, spec_target_max_model_len, quantization, enforce_eager, max_seq_len_to_capture, max_logprobs, disable_sliding_window, skip_tokenizer_init, served_model_name, limit_mm_per_prompt, use_async_output_proc, config_format, hf_overrides, mm_processor_kwargs, disable_mm_preprocessor_cache, override_neuron_config, override_pooler_config, logits_processor_pattern, generation_config, enable_sleep_mode, override_generation_config, model_impl)
362 self.served_model_name = get_served_model_name(model,
363 served_model_name)
--> 364 self.multimodal_config = self._init_multimodal_config(
365 limit_mm_per_prompt)
366 if not self.skip_tokenizer_init:
/usr/local/lib/python3.10/dist-packages/vllm/config.py in _init_multimodal_config(self, limit_mm_per_prompt)
422 ) -> Optional["MultiModalConfig"]:
423 architectures = getattr(self.hf_config, "architectures", [])
--> 424 if ModelRegistry.is_multimodal_model(architectures):
425 return MultiModalConfig(limit_per_prompt=limit_mm_per_prompt or {})
426
/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py in is_multimodal_model(self, architectures)
443 architectures: Union[str, List[str]],
444 ) -> bool:
--> 445 model_cls, _ = self.inspect_model_cls(architectures)
446 return model_cls.supports_multimodal
447
/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py in inspect_model_cls(self, architectures)
403 return (model_info, arch)
404
--> 405 return self._raise_for_unsupported(architectures)
406
407 def resolve_model_cls(
/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/registry.py in _raise_for_unsupported(self, architectures)
355
356 if any(arch in all_supported_archs for arch in architectures):
--> 357 raise ValueError(
358 f"Model architectures {architectures} failed "
359 "to be inspected. Please check the logs for more details.")
ValueError: Model architectures ['Qwen2_5_VLForConditionalGeneration'] failed to be inspected. Please check the logs for more details.
add Codeadd Markdown
🐛 Describe the bug
l
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.