Skip to content

Commit

Permalink
vlm: add support for Qwen2-VL model (#1015)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlpinDale authored Dec 25, 2024
1 parent be59e30 commit 411ac4f
Show file tree
Hide file tree
Showing 18 changed files with 1,821 additions and 338 deletions.
11 changes: 7 additions & 4 deletions aphrodite/assets/video.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from functools import lru_cache
from typing import List, Literal
from typing import List, Optional

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -68,17 +68,20 @@ def video_to_pil_images_list(

@dataclass(frozen=True)
class VideoAsset:
name: Literal["sample_demo_1.mp4"]
name: str = "sample_demo_1.mp4"
num_frames: int = -1
local_path: Optional[str] = None

@property
def pil_images(self) -> List[Image.Image]:
video_path = download_video_asset(self.name)
video_path = (self.local_path if self.local_path else
download_video_asset(self.name))
ret = video_to_pil_images_list(video_path, self.num_frames)
return ret

@property
def np_ndarrays(self) -> List[npt.NDArray]:
video_path = download_video_asset(self.name)
video_path = (self.local_path if self.local_path else
download_video_asset(self.name))
ret = video_to_ndarrays(video_path, self.num_frames)
return ret
7 changes: 5 additions & 2 deletions aphrodite/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1911,8 +1911,11 @@ def _get_and_verify_max_len(
"Disabling sliding window is not supported for models "
"with rope_scaling. Please raise an issue so we can "
"investigate.")
assert "factor" in rope_scaling
scaling_factor = rope_scaling["factor"]
if rope_type == "mrope":
scaling_factor = 1
else:
assert "factor" in rope_scaling
scaling_factor = rope_scaling["factor"]
if rope_type == "yarn":
derived_max_model_len = rope_scaling[
"original_max_position_embeddings"]
Expand Down
11 changes: 11 additions & 0 deletions aphrodite/common/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ class SequenceData(msgspec.Struct,
# is called.
_new_appended_tokens: List[int] = msgspec.field(default_factory=list)

# It is used to compute mrope_position_ids.
_mrope_position_delta: Optional[int] = None

def __post_init__(self) -> None:
assert self._prompt_token_ids.typecode == "l"
assert self._output_token_ids.typecode == "l"
Expand Down Expand Up @@ -209,6 +212,14 @@ def output_token_ids_array(self) -> array:
assert isinstance(self._output_token_ids, array)
return self._output_token_ids

@property
def mrope_position_delta(self) -> Optional[int]:
return self._mrope_position_delta

@mrope_position_delta.setter
def mrope_position_delta(self, new_mrope_position_delta):
self._mrope_position_delta = new_mrope_position_delta

def append_token_id(self, token_id: int, logprob: float) -> None:
self._output_token_ids.append(token_id)
self._new_appended_tokens.append(token_id)
Expand Down
8 changes: 7 additions & 1 deletion aphrodite/endpoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class ConversationMessage(TypedDict, total=False):
"""The tool calls generated by the model, such as function calls."""


ModalityStr = Literal["image", "audio"]
ModalityStr = Literal["image", "audio", "video"]
_T = TypeVar("_T")


Expand Down Expand Up @@ -148,12 +148,18 @@ def _placeholder_str(self, modality: ModalityStr,
hf_config.image_token_index)
if model_type in ("chameleon", "internvl_chat"):
return "<image>"
if model_type == "qwen2_vl":
return "<|vision_start|><|image_pad|><|vision_end|>"

raise TypeError(f"Unknown model type: {model_type}")
elif modality == "audio":
if model_type == "ultravox":
return "<|reserved_special_token_0|>"
raise TypeError(f"Unknown model type: {model_type}")
elif modality == "video":
if model_type == "qwen2_vl":
return "<|vision_start|><|video_pad|><|vision_end|>"
raise TypeError(f"Unknown model type: {model_type}")
else:
raise TypeError(f"Unknown modality: {modality}")

Expand Down
Loading

0 comments on commit 411ac4f

Please sign in to comment.