Skip to content

Commit bddef4d

Browse files
committed
llama models fix
Signed-off-by: Jinheng Li <ahengljh@gmail.com>
1 parent 6001521 commit bddef4d

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

vllm/model_executor/models/llama.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
4949
from vllm.model_executor.model_loader.weight_utils import (
5050
default_weight_loader, maybe_remap_kv_scale_name)
51-
from vllm.platforms import current_platform
51+
from vllm.model_executor.sampling_metadata import SamplingMetadata
5252
from vllm.sequence import IntermediateTensors
5353

5454
from .interfaces import SupportsEagle3, SupportsLoRA, SupportsPP
@@ -541,12 +541,7 @@ def __init__(self,
541541
self.unpadded_vocab_size,
542542
config.hidden_size,
543543
org_num_embeddings=config.vocab_size,
544-
padding_size=(
545-
DEFAULT_VOCAB_PADDING_SIZE
546-
# We need bigger padding if using lora for kernel
547-
# compatibility
548-
if not lora_config else
549-
current_platform.get_lora_vocab_padding_size()),
544+
padding_size=DEFAULT_VOCAB_PADDING_SIZE,
550545
quant_config=quant_config,
551546
prefix=maybe_prefix(prefix, "lm_head"),
552547
)

0 commit comments

Comments
 (0)