Skip to content

Commit

Permalink
[llm.serving] Fix using uni executor when world size == 1 (#50849) (#…
Browse files Browse the repository at this point in the history
…50863)

Cherry-pick: #50849

Signed-off-by: Gene Su <e870252314@gmail.com>
  • Loading branch information
GeneDer authored Feb 24, 2025
1 parent cd9e467 commit ecd0709
Showing 1 changed file with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,18 @@ def __init__(self, ipc_path, engine_args, engine_config):
# Adapted from vllm.engine.multiprocessing.engine.MQLLMEngine.from_engine_args
vllm.plugins.load_general_plugins()

executor_class = vllm.engine.llm_engine.LLMEngine._get_executor_cls(
engine_config
)
# Note (genesu): There is a bug in vllm 0.7.2 forced the use of uni processing
# executor when world_size is 1. This is a bug in vllm 0.7.2 and
# is fixed by https://github.com/vllm-project/vllm/pull/12934 which is shipped
# with vllm 0.7.3. However, in Ray's llm package, we will enforce the use of
# ray distributed executor for all cases so it's always compatible with Ray.
from vllm.executor.ray_distributed_executor import RayDistributedExecutor

self.engine = MQLLMEngine(
ipc_path=ipc_path,
use_async_sockets=engine_config.model_config.use_async_output_proc,
vllm_config=engine_config,
executor_class=executor_class,
executor_class=RayDistributedExecutor,
log_requests=not engine_args.disable_log_requests,
log_stats=not engine_args.disable_log_stats,
usage_context=vllm.usage.usage_lib.UsageContext.API_SERVER,
Expand Down

0 comments on commit ecd0709

Please sign in to comment.