From 4c7aeff39ae419becf7f5c802be6131f5417dc94 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 26 Sep 2024 19:18:14 -0400 Subject: [PATCH] [Misc] Change dummy profiling and BOS fallback warns to log once (#8820) --- vllm/inputs/preprocess.py | 14 ++++++++------ vllm/inputs/registry.py | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index bee3d1ed75cbb..6d54a07e92cc0 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -8,6 +8,7 @@ from vllm.lora.request import LoRARequest from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup +from vllm.utils import print_warning_once from .data import (EncoderDecoderLLMInputs, LLMInputs, PromptInputs, SingletonPromptInputs) @@ -71,20 +72,21 @@ def get_decoder_start_token_id(self) -> Optional[int]: ''' if not self.is_encoder_decoder_model(): - logger.warning("Using None for decoder start token id because " - "this is not an encoder/decoder model.") + print_warning_once("Using None for decoder start token id because " + "this is not an encoder/decoder model.") return None if (self.model_config is None or self.model_config.hf_config is None): - logger.warning("Using None for decoder start token id because " - "model config is not available.") + print_warning_once("Using None for decoder start token id because " + "model config is not available.") return None dec_start_token_id = getattr(self.model_config.hf_config, 'decoder_start_token_id', None) if dec_start_token_id is None: - logger.warning("Falling back on for decoder start token id " - "because decoder start token id is not available.") + print_warning_once("Falling back on for decoder start token " + "id because decoder start token id is not " + "available.") dec_start_token_id = self.get_bos_token_id() return dec_start_token_id diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py index 159d958ebf671..e494ee1224308 100644 --- a/vllm/inputs/registry.py +++ b/vllm/inputs/registry.py @@ -9,7 +9,7 @@ from typing_extensions import TypeVar from vllm.logger import init_logger -from vllm.utils import get_allowed_kwarg_only_overrides +from vllm.utils import get_allowed_kwarg_only_overrides, print_warning_once from .data import LLMInputs @@ -235,9 +235,9 @@ def dummy_data_for_profiling( num_tokens = seq_data.prompt_token_ids if len(num_tokens) < seq_len: if is_encoder_data: - logger.warning( - "Expected at least %d dummy encoder tokens for profiling, " - "but found %d tokens instead.", seq_len, len(num_tokens)) + print_warning_once( + f"Expected at least {seq_len} dummy encoder tokens for " + f"profiling, but found {len(num_tokens)} tokens instead.") else: raise AssertionError( f"Expected at least {seq_len} dummy tokens for profiling, "