Skip to content

Commit

Permalink
[Model][Bugfix] Implicit model flags and reenable Phi-3-Vision (#5896)
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 authored Jun 27, 2024
1 parent e9d32d0 commit 98cf2ed
Show file tree
Hide file tree
Showing 14 changed files with 26 additions and 32 deletions.
2 changes: 0 additions & 2 deletions vllm/model_executor/models/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,6 @@ def forward(


class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"W_pack": ["W_pack"],
"gate_up_proj": [
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,6 @@ def forward(


class ChatGLMForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"query_key_value": ["query_key_value"],
"dense_h_to_4h": ["dense_h_to_4h"]
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,6 @@ def forward(


class GemmaForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/gpt_bigcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ def forward(


class GPTBigCodeForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {"c_attn": ["c_attn"]}

supported_lora_modules = ["c_fc", "c_proj", "wte", "lm_head", "c_attn"]
Expand Down
18 changes: 16 additions & 2 deletions vllm/model_executor/models/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
class SupportsVision(Protocol):
"""The interface required for all vision language models (VLMs)."""

supports_vision: ClassVar[Literal[True]]
supports_vision: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports vision inputs.
Note:
There is no need to redefine this flag if this class is in the
MRO of your model class.
"""

def __init__(self, *, vlm_config: VisionLanguageConfig) -> None:
...
Expand Down Expand Up @@ -52,7 +59,14 @@ def supports_vision(
class SupportsLoRA(Protocol):
"""The interface required for all models that support LoRA."""

supports_lora: ClassVar[Literal[True]]
supports_lora: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports LoRA.
Note:
There is no need to redefine this flag if this class is in the
MRO of your model class.
"""

packed_modules_mapping: ClassVar[Dict[str, List[str]]]
supported_lora_modules: ClassVar[List[str]]
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,6 @@ def forward(


class LlamaForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ class LlavaImageFeatureInputs(TypedDict):
@MULTIMODAL_REGISTRY.register_dummy_data(get_dummy_image_data)
class LlavaForConditionalGeneration(nn.Module, SupportsVision):

supports_vision = True

def __init__(self,
config: LlavaConfig,
vlm_config: VisionLanguageConfig,
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,6 @@ def _image_pixel_processor(
@MULTIMODAL_REGISTRY.register_dummy_data(_get_dummy_image_data)
class LlavaNextForConditionalGeneration(nn.Module, SupportsVision):

supports_vision = True

def __init__(self,
config: LlavaNextConfig,
vlm_config: VisionLanguageConfig,
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,6 @@ def forward(


class MiniCPMForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,6 @@ def forward(


class MixtralForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

fall_back_to_pt_during_load = False

packed_modules_mapping = {
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/phi.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,6 @@ def forward(


class PhiForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
16 changes: 10 additions & 6 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.clip import CLIPVisionModel
from vllm.model_executor.models.llama import LlamaModel
from vllm.model_executor.models.vlm_base import VisionLanguageModelBase
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.image import ImagePixelData, get_dummy_image_data
from vllm.sequence import SamplerOutput

from .interfaces import SupportsVision

logger = init_logger(__name__)

_KEYS_TO_MODIFY_MAPPING = {
Expand Down Expand Up @@ -317,18 +318,21 @@ def _image_processor(

@MULTIMODAL_REGISTRY.register_image_pixel_input(_image_processor)
@MULTIMODAL_REGISTRY.register_dummy_data(get_dummy_image_data)
class Phi3VForCausalLM(VisionLanguageModelBase):
class Phi3VForCausalLM(nn.Module, SupportsVision):

def __init__(self,
config: PretrainedConfig,
vision_language_config: VisionLanguageConfig,
vlm_config: VisionLanguageConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None) -> None:
super().__init__(vision_language_config)
super().__init__()

self.config = config
self.vlm_config = vlm_config

self.model = LlamaModel(config, cache_config, quant_config)
self.vision_embed_tokens = Phi3HDImageEmbedding(
vision_language_config, config, self.model.embed_tokens)
vlm_config, config, self.model.embed_tokens)
self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
self.logits_processor = LogitsProcessor(config.vocab_size)
self.sampler = Sampler()
Expand All @@ -338,7 +342,7 @@ def _parse_and_validate_image_input(
pixel_values = kwargs.pop("pixel_values", None)
image_sizes = kwargs.pop("image_sizes", None)

expected_input_type = self.vision_language_config.image_input_type
expected_input_type = self.vlm_config.image_input_type
ImageInputType = VisionLanguageConfig.ImageInputType

if expected_input_type != ImageInputType.PIXEL_VALUES:
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,6 @@ def forward(


class Qwen2ForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/xverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,6 @@ def forward(


class XverseForCausalLM(nn.Module, SupportsLoRA):
supports_lora = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down

0 comments on commit 98cf2ed

Please sign in to comment.