Motivation
想要对qwen3-vl-8b进行量化,发现在执行下述操作的时候报错,不确定是还没支持还是命令、环境有问题
#!/bin/bash
export HF_MODEL=/data02/wangq-bj/server2/docker_model/v4/20260522_1100_qwen3_vl-8b_lora_sft/
export WORK_DIR=/data02/wangq-bj/server2/docker_model/v4/20260526_1000_qwen3_vl-8b_lora_sft-awq
lmdeploy lite auto_awq
$HF_MODEL
--w-bits 4
--w-group-size 128
--calib-samples 128
--calib-seqlen 2048
--batch-size 1
--work-dir $WORK_DIR
报错:
(lmdeploy13) wangq-bj@xa-G10-27-49:~/server2/Pipe_fitting_connection_drawing_v2_0630_api$ /bin/bash /data02/wangq-bj/server2/Pipe_fitting_connection_drawing_v2_0630_api/auto_awq.sh
2026-06-11 16:53:12,258 - lmdeploy - INFO - builder.py:74 - matching vision model: Qwen3VLModel
2026-06-11 16:53:13,391 - lmdeploy - ERROR - builder.py:83 - build vision model Qwen3VLModel failed,
Traceback (most recent call last):
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/bin/lmdeploy", line 6, in
sys.exit(run())
^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/cli/entrypoint.py", line 39, in run
args.run(args)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/cli/lite.py", line 115, in auto_awq
auto_awq(**kwargs)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/lite/apis/auto_awq.py", line 87, in auto_awq
vl_model, model, tokenizer, work_dir = calibrate(model,
^^^^^^^^^^^^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/lite/apis/calibrate.py", line 256, in calibrate
vl_model = load_vl_model(model, backend=None, with_llm=True).vl_model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/vl/model/builder.py", line 80, in load_vl_model
model.build_model(trust_remote_code=trust_remote_code)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/vl/model/base.py", line 92, in build_model
raise NotImplementedError()
NotImplementedError
环境:
Python 3.12.13
Package Version Build
accelerate 1.13.0
addict 2.4.0
aiohappyeyeballs 2.6.2
aiohttp 3.14.1
aiosignal 1.4.0
annotated-doc 0.0.4
annotated-types 0.7.0
anyio 4.13.0
apache-tvm-ffi 0.1.12
attrs 26.1.0
certifi 2026.5.20
charset-normalizer 3.4.7
click 8.4.1
cloudpickle 3.1.2
cuda-bindings 12.9.4
cuda-pathfinder 1.5.5
distro 1.9.0
einops 0.8.2
fastapi 0.136.3
filelock 3.29.3
fire 0.7.1
fla-core 0.5.0
flash-linear-attention 0.5.0
frozenlist 1.8.0
fsspec 2026.4.0
h11 0.16.0
hf-xet 1.5.1
httpcore 1.0.9
httpx 0.28.1
huggingface_hub 1.18.0
idna 3.18
Jinja2 3.1.6
jiter 0.15.0
jsonschema 4.26.0
jsonschema-specifications 2025.9.1
lmdeploy 0.13.0
markdown-it-py 4.2.0
MarkupSafe 3.0.3
mdurl 0.1.2
ml_dtypes 0.5.4
mmengine-lite 0.10.7
mpmath 1.3.0
msgpack 1.2.0
multidict 6.7.1
networkx 3.6.1
numpy 2.4.6
nvidia-cublas-cu12 12.8.4.1
nvidia-cuda-cupti-cu12 12.8.90
nvidia-cuda-nvrtc-cu12 12.8.93
nvidia-cuda-runtime-cu12 12.8.90
nvidia-cudnn-cu12 9.10.2.21
nvidia-cufft-cu12 11.3.3.83
nvidia-cufile-cu12 1.13.1.3
nvidia-curand-cu12 10.3.9.90
nvidia-cusolver-cu12 11.7.3.90
nvidia-cusparse-cu12 12.5.8.93
nvidia-cusparselt-cu12 0.7.1
nvidia-nccl-cu12 2.27.5
nvidia-nvjitlink-cu12 12.8.93
nvidia-nvshmem-cu12 3.4.5
nvidia-nvtx-cu12 12.8.90
openai 2.41.1
openai-harmony 0.0.8
opencv-python-headless 4.13.0.92
packaging 26.0
partial-json-parser 0.2.1.1.post7
peft 0.14.0
pillow 12.2.0
pip 26.1.1
platformdirs 4.10.0
prometheus_client 0.25.0
propcache 0.5.2
protobuf 7.35.0
psutil 7.2.2
pybase64 1.4.3
pydantic 2.13.4
pydantic_core 2.46.4
Pygments 2.20.0
PyYAML 6.0.3
pyzmq 27.1.0
ray 2.55.1
referencing 0.37.0
regex 2026.5.9
requests 2.34.2
rich 15.0.0
rpds-py 2026.5.1
safetensors 0.8.0
sentencepiece 0.2.1
setuptools 82.0.1
shellingham 1.5.4
shortuuid 1.0.13
sniffio 1.3.1
starlette 1.2.1
sympy 1.14.0
termcolor 3.3.0
tiktoken 0.13.0
tilelang 0.1.11
tokenizers 0.22.2
torch 2.10.0 3
torch_c_dlpack_ext 0.1.5
torchvision 0.25.0
tqdm 4.68.2
transformers 5.11.0
triton 3.6.0
typer 0.25.1
typing_extensions 4.15.0
typing-inspection 0.4.2
urllib3 2.7.0
uvicorn 0.49.0
wheel 0.46.3
xgrammar 0.2.1
yapf 0.43.0
yarl 1.24.2
z3-solver 4.15.4.0
Related resources
No response
Additional context
No response
Motivation
想要对qwen3-vl-8b进行量化,发现在执行下述操作的时候报错,不确定是还没支持还是命令、环境有问题
#!/bin/bash
export HF_MODEL=/data02/wangq-bj/server2/docker_model/v4/20260522_1100_qwen3_vl-8b_lora_sft/
export WORK_DIR=/data02/wangq-bj/server2/docker_model/v4/20260526_1000_qwen3_vl-8b_lora_sft-awq
lmdeploy lite auto_awq
$HF_MODEL
--w-bits 4
--w-group-size 128
--calib-samples 128
--calib-seqlen 2048
--batch-size 1
--work-dir $WORK_DIR
报错:
(lmdeploy13) wangq-bj@xa-G10-27-49:~/server2/Pipe_fitting_connection_drawing_v2_0630_api$ /bin/bash /data02/wangq-bj/server2/Pipe_fitting_connection_drawing_v2_0630_api/auto_awq.sh
2026-06-11 16:53:12,258 - lmdeploy - INFO - builder.py:74 - matching vision model: Qwen3VLModel
2026-06-11 16:53:13,391 - lmdeploy - ERROR - builder.py:83 - build vision model Qwen3VLModel failed,
Traceback (most recent call last):
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/bin/lmdeploy", line 6, in
sys.exit(run())
^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/cli/entrypoint.py", line 39, in run
args.run(args)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/cli/lite.py", line 115, in auto_awq
auto_awq(**kwargs)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/lite/apis/auto_awq.py", line 87, in auto_awq
vl_model, model, tokenizer, work_dir = calibrate(model,
^^^^^^^^^^^^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/lite/apis/calibrate.py", line 256, in calibrate
vl_model = load_vl_model(model, backend=None, with_llm=True).vl_model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/vl/model/builder.py", line 80, in load_vl_model
model.build_model(trust_remote_code=trust_remote_code)
File "/data02/wangq-bj/anaconda3/envs/lmdeploy13/lib/python3.12/site-packages/lmdeploy/vl/model/base.py", line 92, in build_model
raise NotImplementedError()
NotImplementedError
环境:
Python 3.12.13
Package Version Build
accelerate 1.13.0
addict 2.4.0
aiohappyeyeballs 2.6.2
aiohttp 3.14.1
aiosignal 1.4.0
annotated-doc 0.0.4
annotated-types 0.7.0
anyio 4.13.0
apache-tvm-ffi 0.1.12
attrs 26.1.0
certifi 2026.5.20
charset-normalizer 3.4.7
click 8.4.1
cloudpickle 3.1.2
cuda-bindings 12.9.4
cuda-pathfinder 1.5.5
distro 1.9.0
einops 0.8.2
fastapi 0.136.3
filelock 3.29.3
fire 0.7.1
fla-core 0.5.0
flash-linear-attention 0.5.0
frozenlist 1.8.0
fsspec 2026.4.0
h11 0.16.0
hf-xet 1.5.1
httpcore 1.0.9
httpx 0.28.1
huggingface_hub 1.18.0
idna 3.18
Jinja2 3.1.6
jiter 0.15.0
jsonschema 4.26.0
jsonschema-specifications 2025.9.1
lmdeploy 0.13.0
markdown-it-py 4.2.0
MarkupSafe 3.0.3
mdurl 0.1.2
ml_dtypes 0.5.4
mmengine-lite 0.10.7
mpmath 1.3.0
msgpack 1.2.0
multidict 6.7.1
networkx 3.6.1
numpy 2.4.6
nvidia-cublas-cu12 12.8.4.1
nvidia-cuda-cupti-cu12 12.8.90
nvidia-cuda-nvrtc-cu12 12.8.93
nvidia-cuda-runtime-cu12 12.8.90
nvidia-cudnn-cu12 9.10.2.21
nvidia-cufft-cu12 11.3.3.83
nvidia-cufile-cu12 1.13.1.3
nvidia-curand-cu12 10.3.9.90
nvidia-cusolver-cu12 11.7.3.90
nvidia-cusparse-cu12 12.5.8.93
nvidia-cusparselt-cu12 0.7.1
nvidia-nccl-cu12 2.27.5
nvidia-nvjitlink-cu12 12.8.93
nvidia-nvshmem-cu12 3.4.5
nvidia-nvtx-cu12 12.8.90
openai 2.41.1
openai-harmony 0.0.8
opencv-python-headless 4.13.0.92
packaging 26.0
partial-json-parser 0.2.1.1.post7
peft 0.14.0
pillow 12.2.0
pip 26.1.1
platformdirs 4.10.0
prometheus_client 0.25.0
propcache 0.5.2
protobuf 7.35.0
psutil 7.2.2
pybase64 1.4.3
pydantic 2.13.4
pydantic_core 2.46.4
Pygments 2.20.0
PyYAML 6.0.3
pyzmq 27.1.0
ray 2.55.1
referencing 0.37.0
regex 2026.5.9
requests 2.34.2
rich 15.0.0
rpds-py 2026.5.1
safetensors 0.8.0
sentencepiece 0.2.1
setuptools 82.0.1
shellingham 1.5.4
shortuuid 1.0.13
sniffio 1.3.1
starlette 1.2.1
sympy 1.14.0
termcolor 3.3.0
tiktoken 0.13.0
tilelang 0.1.11
tokenizers 0.22.2
torch 2.10.0 3
torch_c_dlpack_ext 0.1.5
torchvision 0.25.0
tqdm 4.68.2
transformers 5.11.0
triton 3.6.0
typer 0.25.1
typing_extensions 4.15.0
typing-inspection 0.4.2
urllib3 2.7.0
uvicorn 0.49.0
wheel 0.46.3
xgrammar 0.2.1
yapf 0.43.0
yarl 1.24.2
z3-solver 4.15.4.0
Related resources
No response
Additional context
No response