Skip to content

Commit

Permalink
[bugfix] torch profiler bug for single gpu with GPUExecutor (vllm-pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
SolitaryThinker authored and siddharth9820 committed Sep 30, 2024
1 parent 348994d commit 822d69c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
2 changes: 1 addition & 1 deletion examples/offline_inference_with_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

# Create an LLM.
llm = LLM(model="facebook/opt-125m")
llm = LLM(model="facebook/opt-125m", tensor_parallel_size=1)

llm.start_profile()

Expand Down
15 changes: 13 additions & 2 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from vllm.engine.llm_engine import LLMEngine, SchedulerOutputState
from vllm.engine.metrics_types import StatLoggerBase
from vllm.executor.executor_base import ExecutorAsyncBase
from vllm.executor.gpu_executor import GPUExecutorAsync
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.inputs import PromptInputs
from vllm.logger import init_logger
Expand Down Expand Up @@ -1019,7 +1020,17 @@ def remove_logger(self, logger_name: str) -> None:
self.engine.remove_logger(logger_name=logger_name)

async def start_profile(self) -> None:
self.engine.model_executor._run_workers("start_profile")
# using type instead of isinstance to check to avoid capturing
# inherited classes
if type(self.engine.model_executor) == GPUExecutorAsync:
self.engine.model_executor.start_profile()
else:
self.engine.model_executor._run_workers("start_profile")

async def stop_profile(self) -> None:
self.engine.model_executor._run_workers("stop_profile")
# using type instead of isinstance to check to avoid capturing
# inherited classes
if type(self.engine.model_executor) == GPUExecutorAsync:
self.engine.model_executor.stop_profile()
else:
self.engine.model_executor._run_workers("stop_profile")
15 changes: 13 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from vllm.engine.output_processor.stop_checker import StopChecker
from vllm.engine.output_processor.util import create_output_by_sequence_group
from vllm.executor.executor_base import ExecutorBase
from vllm.executor.gpu_executor import GPUExecutor
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.inputs import (INPUT_REGISTRY, EncoderDecoderLLMInputs,
InputRegistry, LLMInputs, PromptInputs)
Expand Down Expand Up @@ -1597,10 +1598,20 @@ def check_health(self) -> None:
self.model_executor.check_health()

def start_profile(self) -> None:
self.model_executor.start_profile()
# using type instead of isinstance to check to avoid capturing
# inherited classes (MultiprocessingGPUExecutor)
if type(self.model_executor) == GPUExecutor:
self.model_executor.start_profile()
else:
self.model_executor._run_workers("start_profile")

def stop_profile(self) -> None:
self.model_executor.stop_profile()
# using type instead of isinstance to check to avoid capturing
# inherited classes (MultiprocessingGPUExecutor)
if type(self.model_executor) == GPUExecutor:
self.model_executor.stop_profile()
else:
self.model_executor._run_workers("stop_profile")

def is_tracing_enabled(self) -> bool:
return self.tracer is not None
Expand Down

0 comments on commit 822d69c

Please sign in to comment.