We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e910051 commit 1aaccffCopy full SHA for 1aaccff
vllm/worker/cpu_model_runner.py
@@ -397,7 +397,6 @@ def profile_run(self) -> None:
397
model_config = self.model_config
398
vlm_config = self.vision_language_config
399
400
- # Enable top-k sampling to reflect the accurate memory usage.
401
sampling_params = SamplingParams(top_p=0.99, top_k=self.vocab_size - 1)
402
max_num_batched_tokens = min(
403
self.scheduler_config.max_num_batched_tokens,
0 commit comments