refactor(gpu_model_runner): move _scv_enabled method to follow initialization code

yuz207 · terragon-labs[bot] · yuz207 · commit 4fdd1a895b57 · 2025-10-15T01:25:01.000Z
The _scv_enabled method was relocated within the GPUModelRunner class to follow the initialization code block, improving code readability and organization without changing functionality.

Co-authored-by: terragon-labs[bot] &lt;terragon-labs[bot]@users.noreply.github.com&gt;
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -512,16 +512,6 @@ def __init__(
         self._latest_nwor_window_metrics: dict[str, int | str] | None = None
         self._scv_mode = envs.VLLM_SCV_MODE.lower()
         self._scv_graph_executor: SCVGraphExecutor | None = None
-
-    def _scv_enabled(self) -> bool:
-        if not hasattr(self, "_scv_mode"):
-            self._scv_mode = envs.VLLM_SCV_MODE.lower()
-        if self._scv_mode not in ("off", "graph", "adaptive"):
-            logger.warning("SCV: unsupported mode '%s', disabling.", self._scv_mode)
-            self._scv_mode = "off"
-        return self._scv_mode != "off"
-
-        # Cached outputs.
         self._draft_token_ids: list[list[int]] | torch.Tensor | None = None
         self.transfer_event = torch.cuda.Event()
         self.sampled_token_ids_pinned_cpu = torch.empty(
@@ -531,6 +521,14 @@ def _scv_enabled(self) -> bool:
             pin_memory=self.pin_memory,
         )
 
+    def _scv_enabled(self) -> bool:
+        if not hasattr(self, "_scv_mode"):
+            self._scv_mode = envs.VLLM_SCV_MODE.lower()
+        if self._scv_mode not in ("off", "graph", "adaptive"):
+            logger.warning("SCV: unsupported mode '%s', disabling.", self._scv_mode)
+            self._scv_mode = "off"
+        return self._scv_mode != "off"
+
     def reset_mm_cache(self) -> None:
         if self.mm_budget:
             self.mm_budget.reset_cache()