Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/inputs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def dummy_data_for_profiling(
from vllm.multimodal import MultiModalKwargs
from vllm.multimodal.profiling import MultiModalProfiler

if mm_registry.has_processor(model_config):
if False and mm_registry.has_processor(model_config):
tokenizer = cached_tokenizer_from_config(model_config)
processor = mm_registry.create_processor(model_config,
tokenizer,
Expand Down
4 changes: 3 additions & 1 deletion vllm/model_executor/models/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ def __init__(
config.hidden_size, eps=config.layernorm_epsilon)

# MLP
self.mlp = GLMMLP(config, quant_config, prefix=f"{prefix}.mlp")
self.mlp = GLMMLP(config, layer, quant_config, prefix=f"{prefix}.mlp")

self.layer = layer

def forward(
self,
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/minicpmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,7 @@ def __new__(cls, *, vllm_config: VllmConfig, prefix: str = ""):

# quant_config references base class members,
# so update values before init is called
cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping)
cls.embedding_modules.update(instance_cls.embedding_modules)
cls.embedding_padding_modules += instance_cls.embedding_padding_modules
# cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping)
# cls.embedding_modules.update(instance_cls.embedding_modules)
# cls.embedding_padding_modules += instance_cls.embedding_padding_modules
return instance_cls(vllm_config=vllm_config, prefix=prefix)
1 change: 1 addition & 0 deletions vllm/model_executor/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
"Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
"SolarForCausalLM": ("solar", "SolarForCausalLM"),
"TeleChat2ForCausalLM": ("telechat2", "TeleChat2ForCausalLM"),
"TeleChatForCausalLM": ("telechat2", "TeleChat2ForCausalLM"),
"XverseForCausalLM": ("llama", "LlamaForCausalLM"),
"Zamba2ForCausalLM": ("zamba2", "Zamba2ForCausalLM"),
# [Encoder-decoder]
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/telechat2.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
for layer in self.layers:
if not isinstance(layer, PPMissingLayer):
layer.self_attn.qkv_proj.bias = None
layer.self_attn.qkv_proj.skip_bias_add = True
#layer.self_attn.qkv_proj.skip_bias_add = True
layer.mlp.gate_up_proj.bias = None
layer.mlp.gate_up_proj.skip_bias_add = True
#layer.mlp.gate_up_proj.skip_bias_add = True

def load_weights(self, weights: Iterable[Tuple[str,
torch.Tensor]]) -> Set[str]:
Expand Down
28 changes: 20 additions & 8 deletions vllm/worker/xpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def add_seq_group(self, seq_group_metadata: SequenceGroupMetadata):
def build(self) -> ModelInputForXPU:
input_tokens: List[int] = []
input_positions: List[int] = []
input_mrope_positions: List[List[int]] = [[] for _ in range(3)]
slot_mapping: List[int] = []

seq_lens: List[int] = []
Expand Down Expand Up @@ -249,7 +250,7 @@ def build(self) -> ModelInputForXPU:
input_positions.append(position)
if is_prompt:
mm_data = seq_group_metadata.multi_modal_data
if mm_data and not self.runner.model_is_mrope:
if mm_data and not self.runner.model_is_mrope and not self.runner.mm_registry.has_processor(self.runner.model_config):
mm_kwargs = self.multi_modal_input_mapper(mm_data)
else:
mm_kwargs = mm_data
Expand All @@ -262,19 +263,18 @@ def build(self) -> ModelInputForXPU:
"mrope embedding type requires multi-modal input mapper "
"returns 'image_grid_thw' or 'video_grid_thw'.")

second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None)
hf_config = self.runner.model_config.hf_config
token_ids = seq_data.get_token_ids()
temp_mrope_input_positions, mrope_position_delta = \
MRotaryEmbedding.get_input_positions(
token_ids,
hf_config=hf_config,
image_grid_thw=image_grid_thw,
video_grid_thw=video_grid_thw,
image_token_id=hf_config.image_token_id,
video_token_id=hf_config.video_token_id,
vision_start_token_id=hf_config.vision_start_token_id,
vision_end_token_id=hf_config.vision_end_token_id,
spatial_merge_size=hf_config.vision_config.spatial_merge_size,
context_len=0,
second_per_grid_ts=second_per_grid_ts,
seq_len=seq_len,
context_len=context_len,
)
seq_data.mrope_position_delta = mrope_position_delta
if mrope_input_positions is None:
Expand All @@ -284,6 +284,16 @@ def build(self) -> ModelInputForXPU:
# for _seq_mrope_input_positions in msections:
mrope_input_positions[idx].extend(
temp_mrope_input_positions[idx])
else:
if seq_data.mrope_position_delta is not None:
context_len = seq_data.get_num_computed_tokens()
next_pos = MRotaryEmbedding.get_next_input_positions(
seq_data.mrope_position_delta,
context_len,
seq_len,
)
for idx in range(3):
input_mrope_positions[idx].extend(next_pos[idx])
if is_prompt:
assert len(seq_ids) == 1
num_prefills += 1
Expand Down Expand Up @@ -334,7 +344,9 @@ def build(self) -> ModelInputForXPU:
dtype=torch.long,
device=self.device)

if self.runner.model_is_mrope and mrope_input_positions is not None:
if self.runner.model_is_mrope and (mrope_input_positions is not None or any(input_mrope_positions)):
if any(input_mrope_positions):
mrope_input_positions = input_mrope_positions
input_positions_tensor = torch.tensor(mrope_input_positions,
dtype=torch.long,
device=self.device)
Expand Down