From 06c8f8d885738a6525fd6774186f094e15676054 Mon Sep 17 00:00:00 2001 From: Rui Qiao <161574667+ruisearch42@users.noreply.github.com> Date: Thu, 27 Feb 2025 09:01:21 -0800 Subject: [PATCH] [bugfix] Fix profiling for RayDistributedExecutor (#13945) Signed-off-by: Rui Qiao --- vllm/executor/ray_distributed_executor.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index c3b41d1c11340..2accb9e17f3dd 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -309,19 +309,24 @@ def sort_by_driver_then_worker_ip(item: RayWorkerMetaData): ",".join(map(str, node_gpus[node_id])), } for (node_id, _) in worker_node_and_gpu_ids] + # Environment variables to copy from driver to workers + env_vars_to_copy = [ + "VLLM_ATTENTION_BACKEND", "TPU_CHIPS_PER_HOST_BOUNDS", + "TPU_HOST_BOUNDS", "VLLM_USE_V1", "VLLM_TRACE_FUNCTION", + "VLLM_TORCH_PROFILER_DIR", "VLLM_TEST_ENABLE_EP" + ] + + # Copy existing env vars to each worker's args for args in all_args_to_update_environment_variables: - # some carry-over env vars from the driver # TODO: refactor platform-specific env vars - for name in [ - "VLLM_ATTENTION_BACKEND", - "TPU_CHIPS_PER_HOST_BOUNDS", - "TPU_HOST_BOUNDS", - "VLLM_USE_V1", - "VLLM_TRACE_FUNCTION", - ]: + for name in env_vars_to_copy: if name in os.environ: args[name] = os.environ[name] + logger.info( + "Copying the following environment variables to workers: %s", + [v for v in env_vars_to_copy if v in os.environ]) + self._env_vars_for_all_workers = ( all_args_to_update_environment_variables)