Using formatted string since warning_once does not accept formatting string

gshtras · gshtras · commit 349db642b371 · 2025-04-07T15:56:36.000Z
Signed-off-by: Gregory Shtrasberg &lt;Gregory.Shtrasberg@amd.com&gt;
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
@@ -216,17 +216,18 @@ def get_encoder_dummy_data(
         # Encoder-decoder multimodal models only support v0
         if total_len > seq_len:
             # `max_num_batched_tokens` is defined by `SchedulerConfig`
-            logger.warning(
+            logger.warning_once(
                 "The encoder sequence length used for profiling ("
-                "max_num_batched_tokens / max_num_seqs = %d) is too short "
+                f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
+                " is too short "
                 "to hold the multi-modal embeddings in the worst case "
-                "(%d tokens in total, out of which %s are reserved for "
+                f"({total_len} tokens in total, out of which "
+                f"{total_placeholders_by_modality} are reserved for "
                 "multi-modal embeddings). This may cause certain "
                 "multi-modal inputs to fail during inference, even when "
                 "the input text is short. To avoid this, you should "
                 "increase `max_model_len`, reduce `max_num_seqs`, "
-                "and/or reduce `mm_counts`.", seq_len, total_len,
-                total_placeholders_by_modality)
+                "and/or reduce `mm_counts`.")
 
         processor = cast(EncDecMultiModalProcessor, self.processor)
         if processor.pad_dummy_encoder_prompt: