Skip to content

Commit 2e551b7

Browse files
yewentao2561994
authored andcommitted
[Log] Optimize Startup Log (vllm-project#26601)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: 1994 <1994@users.noreply.github.com>
1 parent 401168e commit 2e551b7

File tree

6 files changed

+17
-16
lines changed

6 files changed

+17
-16
lines changed

vllm/distributed/device_communicators/cuda_communicator.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from vllm.distributed.device_communicators.pynccl_allocator import (
1414
is_symmetric_memory_enabled,
1515
)
16+
from vllm.distributed.parallel_state import is_global_first_rank
1617
from vllm.logger import init_logger
1718
from vllm.platforms import current_platform
1819

@@ -95,35 +96,35 @@ def __init__(
9596
from .all2all import NaiveAll2AllManager
9697

9798
self.all2all_manager = NaiveAll2AllManager(self.cpu_group)
98-
logger.info("Using naive all2all manager.")
9999
elif all2all_backend == "allgather_reducescatter":
100100
from .all2all import AgRsAll2AllManager
101101

102102
self.all2all_manager = AgRsAll2AllManager(self.cpu_group)
103-
logger.info("Using AllGather-ReduceScatter all2all manager.")
104103
elif all2all_backend == "pplx":
105104
from .all2all import PPLXAll2AllManager
106105

107106
self.all2all_manager = PPLXAll2AllManager(self.cpu_group)
108-
logger.info("Using PPLX all2all manager.")
109107
elif all2all_backend == "deepep_high_throughput":
110108
from .all2all import DeepEPHTAll2AllManager
111109

112110
self.all2all_manager = DeepEPHTAll2AllManager(self.cpu_group)
113-
logger.info("Using DeepEP High-Throughput all2all manager.")
114111
elif all2all_backend == "deepep_low_latency":
115112
from .all2all import DeepEPLLAll2AllManager
116113

117114
self.all2all_manager = DeepEPLLAll2AllManager(self.cpu_group)
118-
logger.info("Using DeepEP Low-Latency all2all manager.")
119115
elif all2all_backend == "flashinfer_all2allv":
120116
from .all2all import FlashInferAllToAllManager
121117

122118
self.all2all_manager = FlashInferAllToAllManager(self.cpu_group)
123-
logger.info("Using Flashinfer all2allv manager.")
124119
else:
125120
raise ValueError(f"Unknown all2all backend: {all2all_backend}")
126121

122+
if is_global_first_rank():
123+
logger.info(
124+
"Using %s all2all manager.",
125+
self.all2all_manager.__class__.__name__,
126+
)
127+
127128
def all_reduce(self, input_):
128129
# since currently we perform copy input -> symm_input -> out-of-place AR
129130
# return symm_output, we don't need to check if input is symmetric

vllm/distributed/device_communicators/pynccl.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,10 @@ def __init__(
105105
self.disabled = False
106106

107107
self.nccl_version = self.nccl.ncclGetRawVersion()
108-
logger.info("vLLM is using nccl==%s", self.nccl.ncclGetVersion())
109-
110108
if self.rank == 0:
111109
# get the unique id from NCCL
112110
self.unique_id = self.nccl.ncclGetUniqueId()
111+
logger.info("vLLM is using nccl==%s", self.nccl.ncclGetVersion())
113112
else:
114113
# construct an empty unique id
115114
self.unique_id = ncclUniqueId()

vllm/utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1144,7 +1144,7 @@ def find_nccl_library() -> str:
11441144
so_file = "librccl.so.1"
11451145
else:
11461146
raise ValueError("NCCL only supports CUDA and ROCm backends.")
1147-
logger.info("Found nccl from library %s", so_file)
1147+
logger.debug_once("Found nccl from library %s", so_file)
11481148
return so_file
11491149

11501150

vllm/v1/attention/backends/mla/cutlass_mla.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def __init__(
139139
# FORCE_NUM_KV_SPLITS=1
140140
force_num_kv_splits = os.environ.get("FORCE_NUM_KV_SPLITS", None)
141141
if force_num_kv_splits:
142-
logger.warning_once("Forcing num_kv_splits to %d", int(force_num_kv_splits))
142+
logger.debug_once("Forcing num_kv_splits to %d", int(force_num_kv_splits))
143143
self._num_kv_splits = int(force_num_kv_splits)
144144
else:
145145
self._num_kv_splits = -1 # => Auto-detect

vllm/v1/engine/core.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from vllm.config import ParallelConfig, VllmConfig
2121
from vllm.distributed import stateless_destroy_torch_distributed_process_group
22+
from vllm.distributed.parallel_state import is_global_first_rank
2223
from vllm.logger import init_logger
2324
from vllm.logging_utils.dump_input import dump_engine_exception
2425
from vllm.lora.request import LoRARequest
@@ -91,11 +92,12 @@ def __init__(
9192
load_general_plugins()
9293

9394
self.vllm_config = vllm_config
94-
logger.info(
95-
"Initializing a V1 LLM engine (v%s) with config: %s",
96-
VLLM_VERSION,
97-
vllm_config,
98-
)
95+
if is_global_first_rank():
96+
logger.info(
97+
"Initializing a V1 LLM engine (v%s) with config: %s",
98+
VLLM_VERSION,
99+
vllm_config,
100+
)
99101

100102
self.log_stats = log_stats
101103

vllm/v1/worker/gpu_model_runner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2876,7 +2876,6 @@ def load_model(self, eep_scale_up: bool = False) -> None:
28762876
with DeviceMemoryProfiler() as m:
28772877
time_before_load = time.perf_counter()
28782878
model_loader = get_model_loader(self.load_config)
2879-
logger.info("Loading model from scratch...")
28802879
self.model = model_loader.load_model(
28812880
vllm_config=self.vllm_config, model_config=self.model_config
28822881
)

0 commit comments

Comments
 (0)