Skip to content

Commit

Permalink
Missed one
Browse files Browse the repository at this point in the history
  • Loading branch information
petersalas committed Sep 12, 2024
1 parent 7074367 commit e457298
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion vllm/model_executor/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vllm.attention.backends.blocksparse_attn import (
BlocksparseFlashAttentionMetadata)
from vllm.attention.backends.flash_attn import FlashAttentionMetadata
from vllm.attention.backends.flashinfer import FlashInferMetadata
from vllm.attention.backends.rocm_flash_attn import ROCmFlashAttentionMetadata
from vllm.attention.backends.xformers import XFormersMetadata
from vllm.config import (CacheConfig, LoRAConfig, MultiModalConfig,
Expand Down Expand Up @@ -152,7 +153,7 @@ def merge_partial_multimodal_embeddings(
if not isinstance(
prefill_metadata,
(FlashAttentionMetadata, XFormersMetadata, ROCmFlashAttentionMetadata,
BlocksparseFlashAttentionMetadata)):
BlocksparseFlashAttentionMetadata, FlashInferMetadata)):
raise ValueError(
f"Unsupported Attention backend ({type(prefill_metadata)}) for "
"partial embedding replacement.")
Expand Down

0 comments on commit e457298

Please sign in to comment.