Skip to content

Commit 2fa2a50

Browse files
authored
[Bugfix] Fix Minicpm-O-int4 GPTQ model inference (vllm-project#17397)
Signed-off-by: Isotr0py <2037008807@qq.com>
1 parent 08e15de commit 2fa2a50

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

vllm/model_executor/models/minicpmo.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,16 @@
2828

2929
import torch
3030
from torch import nn
31-
from transformers import BatchFeature
31+
from transformers import BatchFeature, PretrainedConfig
3232
from transformers.modeling_outputs import BaseModelOutputWithPast
3333
from transformers.models.whisper.modeling_whisper import (
3434
ACT2FN, WHISPER_ATTENTION_CLASSES, WhisperConfig, WhisperEncoder)
3535

3636
from vllm.config import VllmConfig
37+
from vllm.model_executor.layers.quantization import QuantizationConfig
38+
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
39+
from vllm.model_executor.layers.quantization.gptq_marlin import (
40+
GPTQMarlinConfig)
3741
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargs
3842
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
3943
NestedTensors)
@@ -512,6 +516,36 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
512516

513517
self.audio_token_id = None
514518

519+
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
520+
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
521+
# seems to avoid vision encoder sections for some models.
522+
# See: https://huggingface.co/openbmb/MiniCPM-o-2_6-int4
523+
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
524+
return None
525+
return quant_config
526+
527+
def init_vision_module(
528+
self,
529+
config: PretrainedConfig,
530+
quant_config: Optional[QuantizationConfig] = None,
531+
prefix: str = "",
532+
) -> nn.Module:
533+
# MiniCPMO GPTQ model leave vpm unquantized.
534+
quant_config = self._maybe_ignore_quant_config(quant_config)
535+
return super().init_vision_module(config, quant_config, prefix)
536+
537+
def init_resampler(
538+
self,
539+
embed_dim: int,
540+
vision_dim: int,
541+
quant_config: Optional[QuantizationConfig] = None,
542+
prefix: str = "",
543+
) -> nn.Module:
544+
# MiniCPMO GPTQ model leave resampler unquantized.
545+
quant_config = self._maybe_ignore_quant_config(quant_config)
546+
return super().init_resampler(embed_dim, vision_dim, quant_config,
547+
prefix)
548+
515549
def init_audio_module(self, *, vllm_config: VllmConfig, prefix: str = ""):
516550
# Do not use parameters temporarily
517551
audio_config = self.config.audio_config

vllm/model_executor/models/minicpmv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ def init_llm(
11811181
def init_vision_module(
11821182
self,
11831183
config: PretrainedConfig,
1184-
quant_config: Optional[QuantizationConfig],
1184+
quant_config: Optional[QuantizationConfig] = None,
11851185
prefix: str = "",
11861186
) -> nn.Module:
11871187
model = Idefics2VisionTransformer(config.vision_config,

0 commit comments

Comments
 (0)