rebase

bigPYJ1151 · bigPYJ1151 · commit 1aaccffe1095 · 2024-06-18T02:50:38.000Z
diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py
@@ -397,7 +397,6 @@ def profile_run(self) -> None:
         model_config = self.model_config
         vlm_config = self.vision_language_config
 
-        # Enable top-k sampling to reflect the accurate memory usage.
         sampling_params = SamplingParams(top_p=0.99, top_k=self.vocab_size - 1)
         max_num_batched_tokens = min(
             self.scheduler_config.max_num_batched_tokens,