Skip to content

Commit ac65bc9

Browse files
authored
[platform] add debug logging during inferring the device type (#14195)
Signed-off-by: youkaichao <youkaichao@gmail.com>
1 parent f78c0be commit ac65bc9

File tree

1 file changed

+55
-9
lines changed

1 file changed

+55
-9
lines changed

vllm/platforms/__init__.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,25 @@ def vllm_version_matches_substr(substr: str) -> bool:
3232

3333
def tpu_platform_plugin() -> Optional[str]:
3434
is_tpu = False
35+
logger.debug("Checking if TPU platform is available.")
3536
try:
3637
# While it's technically possible to install libtpu on a
3738
# non-TPU machine, this is a very uncommon scenario. Therefore,
3839
# we assume that libtpu is installed if and only if the machine
3940
# has TPUs.
4041
import libtpu # noqa: F401
4142
is_tpu = True
42-
except Exception:
43+
logger.debug("Confirmed TPU platform is available.")
44+
except Exception as e:
45+
logger.debug("TPU platform is not available because: %s", str(e))
4346
pass
4447

4548
return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None
4649

4750

4851
def cuda_platform_plugin() -> Optional[str]:
4952
is_cuda = False
50-
53+
logger.debug("Checking if CUDA platform is available.")
5154
try:
5255
from vllm.utils import import_pynvml
5356
pynvml = import_pynvml()
@@ -60,9 +63,19 @@ def cuda_platform_plugin() -> Optional[str]:
6063
# on a GPU machine, even if in a cpu build.
6164
is_cuda = (pynvml.nvmlDeviceGetCount() > 0
6265
and not vllm_version_matches_substr("cpu"))
66+
if pynvml.nvmlDeviceGetCount() <= 0:
67+
logger.debug(
68+
"CUDA platform is not available because no GPU is found.")
69+
if vllm_version_matches_substr("cpu"):
70+
logger.debug("CUDA platform is not available because"
71+
" vLLM is built with CPU.")
72+
if is_cuda:
73+
logger.debug("Confirmed CUDA platform is available.")
6374
finally:
6475
pynvml.nvmlShutdown()
6576
except Exception as e:
77+
logger.debug("Exception happens when checking CUDA platform: %s",
78+
str(e))
6679
if "nvml" not in e.__class__.__name__.lower():
6780
# If the error is not related to NVML, re-raise it.
6881
raise e
@@ -75,84 +88,117 @@ def cuda_is_jetson() -> bool:
7588
or os.path.exists("/sys/class/tegra-firmware")
7689

7790
if cuda_is_jetson():
91+
logger.debug("Confirmed CUDA platform is available on Jetson.")
7892
is_cuda = True
93+
else:
94+
logger.debug("CUDA platform is not available because: %s", str(e))
7995

8096
return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None
8197

8298

8399
def rocm_platform_plugin() -> Optional[str]:
84100
is_rocm = False
85-
101+
logger.debug("Checking if ROCm platform is available.")
86102
try:
87103
import amdsmi
88104
amdsmi.amdsmi_init()
89105
try:
90106
if len(amdsmi.amdsmi_get_processor_handles()) > 0:
91107
is_rocm = True
108+
logger.debug("Confirmed ROCm platform is available.")
92109
finally:
93110
amdsmi.amdsmi_shut_down()
94-
except Exception:
111+
except Exception as e:
112+
logger.debug("ROCm platform is not available because: %s", str(e))
95113
pass
96114

97115
return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None
98116

99117

100118
def hpu_platform_plugin() -> Optional[str]:
101119
is_hpu = False
120+
logger.debug("Checking if HPU platform is available.")
102121
try:
103122
from importlib import util
104123
is_hpu = util.find_spec('habana_frameworks') is not None
105-
except Exception:
124+
if is_hpu:
125+
logger.debug("Confirmed HPU platform is available.")
126+
else:
127+
logger.debug("HPU platform is not available because "
128+
"habana_frameworks is not found.")
129+
except Exception as e:
130+
logger.debug("HPU platform is not available because: %s", str(e))
106131
pass
107132

108133
return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None
109134

110135

111136
def xpu_platform_plugin() -> Optional[str]:
112137
is_xpu = False
113-
138+
logger.debug("Checking if XPU platform is available.")
114139
try:
115140
# installed IPEX if the machine has XPUs.
116141
import intel_extension_for_pytorch # noqa: F401
117142
import oneccl_bindings_for_pytorch # noqa: F401
118143
import torch
119144
if hasattr(torch, 'xpu') and torch.xpu.is_available():
120145
is_xpu = True
121-
except Exception:
146+
logger.debug("Confirmed XPU platform is available.")
147+
except Exception as e:
148+
logger.debug("XPU platform is not available because: %s", str(e))
122149
pass
123150

124151
return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None
125152

126153

127154
def cpu_platform_plugin() -> Optional[str]:
128155
is_cpu = False
156+
logger.debug("Checking if CPU platform is available.")
129157
try:
130158
is_cpu = vllm_version_matches_substr("cpu")
159+
if is_cpu:
160+
logger.debug("Confirmed CPU platform is available because"
161+
" vLLM is built with CPU.")
131162
if not is_cpu:
132163
import platform
133164
is_cpu = platform.machine().lower().startswith("arm")
165+
if is_cpu:
166+
logger.debug("Confirmed CPU platform is available"
167+
" because the machine is ARM.")
134168

135-
except Exception:
169+
except Exception as e:
170+
logger.debug("CPU platform is not available because: %s", str(e))
136171
pass
137172

138173
return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None
139174

140175

141176
def neuron_platform_plugin() -> Optional[str]:
142177
is_neuron = False
178+
logger.debug("Checking if Neuron platform is available.")
143179
try:
144180
import transformers_neuronx # noqa: F401
145181
is_neuron = True
146-
except ImportError:
182+
logger.debug("Confirmed Neuron platform is available because"
183+
" transformers_neuronx is found.")
184+
except ImportError as e:
185+
logger.debug("Neuron platform is not available because: %s", str(e))
147186
pass
148187

149188
return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None
150189

151190

152191
def openvino_platform_plugin() -> Optional[str]:
153192
is_openvino = False
193+
logger.debug("Checking if OpenVINO platform is available.")
154194
with suppress(Exception):
155195
is_openvino = vllm_version_matches_substr("openvino")
196+
if is_openvino:
197+
logger.debug("Confirmed OpenVINO platform is available"
198+
" because vLLM is built with OpenVINO.")
199+
if not is_openvino:
200+
logger.debug("OpenVINO platform is not available because"
201+
" vLLM is not built with OpenVINO.")
156202

157203
return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None
158204

0 commit comments

Comments
 (0)