Skip to content

Commit 6c728f7

Browse files
authored
[Chore] Separate out NCCL utilities from vllm.utils (#27197)
Signed-off-by: dongbo910220 <1275604947@qq.com>
1 parent 80e9452 commit 6c728f7

File tree

4 files changed

+66
-87
lines changed

4 files changed

+66
-87
lines changed

vllm/distributed/device_communicators/pynccl_allocator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
1515
from vllm.logger import init_logger
1616
from vllm.platforms import current_platform
17-
from vllm.utils import find_nccl_include_paths
17+
from vllm.utils.nccl import find_nccl_include_paths
1818

1919
logger = init_logger(__name__)
2020

vllm/distributed/device_communicators/pynccl_wrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from vllm import envs
3434
from vllm.logger import init_logger
3535
from vllm.platforms import current_platform
36-
from vllm.utils import find_nccl_library
36+
from vllm.utils.nccl import find_nccl_library
3737

3838
logger = init_logger(__name__)
3939

vllm/utils/__init__.py

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import multiprocessing
1212
import os
1313
import signal
14-
import subprocess
1514
import sys
1615
import tempfile
1716
import textwrap
@@ -211,90 +210,6 @@ def init_cached_hf_modules() -> None:
211210
init_hf_modules()
212211

213212

214-
@cache
215-
def find_library(lib_name: str) -> str:
216-
"""
217-
Find the library file in the system.
218-
`lib_name` is full filename, with both prefix and suffix.
219-
This function resolves `lib_name` to the full path of the library.
220-
"""
221-
# Adapted from https://github.com/openai/triton/blob/main/third_party/nvidia/backend/driver.py#L19 # noqa
222-
# According to https://en.wikipedia.org/wiki/Filesystem_Hierarchy_Standard
223-
# `/sbin/ldconfig` should exist in all Linux systems.
224-
# `/sbin/ldconfig` searches the library in the system
225-
libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode()
226-
# each line looks like the following:
227-
# libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
228-
locs = [line.split()[-1] for line in libs.splitlines() if lib_name in line]
229-
# `LD_LIBRARY_PATH` searches the library in the user-defined paths
230-
env_ld_library_path = envs.LD_LIBRARY_PATH
231-
if not locs and env_ld_library_path:
232-
locs = [
233-
os.path.join(dir, lib_name)
234-
for dir in env_ld_library_path.split(":")
235-
if os.path.exists(os.path.join(dir, lib_name))
236-
]
237-
if not locs:
238-
raise ValueError(f"Cannot find {lib_name} in the system.")
239-
return locs[0]
240-
241-
242-
def find_nccl_library() -> str:
243-
"""
244-
We either use the library file specified by the `VLLM_NCCL_SO_PATH`
245-
environment variable, or we find the library file brought by PyTorch.
246-
After importing `torch`, `libnccl.so.2` or `librccl.so.1` can be
247-
found by `ctypes` automatically.
248-
"""
249-
so_file = envs.VLLM_NCCL_SO_PATH
250-
251-
# manually load the nccl library
252-
if so_file:
253-
logger.info(
254-
"Found nccl from environment variable VLLM_NCCL_SO_PATH=%s", so_file
255-
)
256-
else:
257-
if torch.version.cuda is not None:
258-
so_file = "libnccl.so.2"
259-
elif torch.version.hip is not None:
260-
so_file = "librccl.so.1"
261-
else:
262-
raise ValueError("NCCL only supports CUDA and ROCm backends.")
263-
logger.debug_once("Found nccl from library %s", so_file)
264-
return so_file
265-
266-
267-
def find_nccl_include_paths() -> list[str] | None:
268-
"""
269-
We either use the nccl.h specified by the `VLLM_NCCL_INCLUDE_PATH`
270-
environment variable, or we find the library file brought by
271-
nvidia-nccl-cuXX. load_inline by default uses
272-
torch.utils.cpp_extension.include_paths
273-
"""
274-
paths: list[str] = []
275-
inc = envs.VLLM_NCCL_INCLUDE_PATH
276-
if inc and os.path.isdir(inc):
277-
paths.append(inc)
278-
279-
try:
280-
spec = importlib.util.find_spec("nvidia.nccl")
281-
if spec and getattr(spec, "submodule_search_locations", None):
282-
for loc in spec.submodule_search_locations:
283-
inc_dir = os.path.join(loc, "include")
284-
if os.path.exists(os.path.join(inc_dir, "nccl.h")):
285-
paths.append(inc_dir)
286-
except Exception:
287-
pass
288-
289-
seen = set()
290-
out: list[str] = []
291-
for p in paths:
292-
if p and p not in seen:
293-
out.append(p)
294-
seen.add(p)
295-
return out or None
296-
297-
298213
def enable_trace_function_call_for_thread(vllm_config: VllmConfig) -> None:
299214
"""Set up function tracing for the current thread,
300215
if enabled via the VLLM_TRACE_FUNCTION environment variable

vllm/utils/nccl.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
from __future__ import annotations
5+
6+
import importlib
7+
import os
8+
9+
import torch
10+
11+
import vllm.envs as envs
12+
from vllm.logger import init_logger
13+
14+
logger = init_logger(__name__)
15+
16+
17+
def find_nccl_library() -> str:
18+
"""Return NCCL/RCCL shared library name to load.
19+
20+
Uses `VLLM_NCCL_SO_PATH` if set; otherwise chooses by torch backend.
21+
"""
22+
so_file = envs.VLLM_NCCL_SO_PATH
23+
if so_file:
24+
logger.info(
25+
"Found nccl from environment variable VLLM_NCCL_SO_PATH=%s", so_file
26+
)
27+
else:
28+
if torch.version.cuda is not None:
29+
so_file = "libnccl.so.2"
30+
elif torch.version.hip is not None:
31+
so_file = "librccl.so.1"
32+
else:
33+
raise ValueError("NCCL only supports CUDA and ROCm backends.")
34+
logger.debug_once("Found nccl from library %s", so_file)
35+
return so_file
36+
37+
38+
def find_nccl_include_paths() -> list[str] | None:
39+
"""Return possible include paths containing `nccl.h`.
40+
41+
Considers `VLLM_NCCL_INCLUDE_PATH` and the `nvidia-nccl-cuXX` package.
42+
"""
43+
paths: list[str] = []
44+
inc = envs.VLLM_NCCL_INCLUDE_PATH
45+
if inc and os.path.isdir(inc):
46+
paths.append(inc)
47+
48+
try:
49+
spec = importlib.util.find_spec("nvidia.nccl")
50+
if spec and getattr(spec, "submodule_search_locations", None):
51+
for loc in spec.submodule_search_locations:
52+
inc_dir = os.path.join(loc, "include")
53+
if os.path.exists(os.path.join(inc_dir, "nccl.h")):
54+
paths.append(inc_dir)
55+
except Exception as e:
56+
logger.debug("Failed to find nccl include path from nvidia.nccl package: %s", e)
57+
58+
seen: set[str] = set()
59+
out: list[str] = []
60+
for p in paths:
61+
if p and p not in seen:
62+
out.append(p)
63+
seen.add(p)
64+
return out or None

0 commit comments

Comments
 (0)