Skip to content

[platform] add debug logging during inferring the device type #14195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 4, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 55 additions & 9 deletions vllm/platforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,25 @@ def vllm_version_matches_substr(substr: str) -> bool:

def tpu_platform_plugin() -> Optional[str]:
is_tpu = False
logger.debug("Checking if TPU platform is available.")
try:
# While it's technically possible to install libtpu on a
# non-TPU machine, this is a very uncommon scenario. Therefore,
# we assume that libtpu is installed if and only if the machine
# has TPUs.
import libtpu # noqa: F401
is_tpu = True
except Exception:
logger.debug("Confirmed TPU platform is available.")
except Exception as e:
logger.debug("TPU platform is not available because: %s", str(e))
pass

return "vllm.platforms.tpu.TpuPlatform" if is_tpu else None


def cuda_platform_plugin() -> Optional[str]:
is_cuda = False

logger.debug("Checking if CUDA platform is available.")
try:
from vllm.utils import import_pynvml
pynvml = import_pynvml()
Expand All @@ -60,9 +63,19 @@ def cuda_platform_plugin() -> Optional[str]:
# on a GPU machine, even if in a cpu build.
is_cuda = (pynvml.nvmlDeviceGetCount() > 0
and not vllm_version_matches_substr("cpu"))
if pynvml.nvmlDeviceGetCount() <= 0:
logger.debug(
"CUDA platform is not available because no GPU is found.")
if vllm_version_matches_substr("cpu"):
logger.debug("CUDA platform is not available because"
" vLLM is built with CPU.")
if is_cuda:
logger.debug("Confirmed CUDA platform is available.")
finally:
pynvml.nvmlShutdown()
except Exception as e:
logger.debug("Exception happens when checking CUDA platform: %s",
str(e))
if "nvml" not in e.__class__.__name__.lower():
# If the error is not related to NVML, re-raise it.
raise e
Expand All @@ -75,84 +88,117 @@ def cuda_is_jetson() -> bool:
or os.path.exists("/sys/class/tegra-firmware")

if cuda_is_jetson():
logger.debug("Confirmed CUDA platform is available on Jetson.")
is_cuda = True
else:
logger.debug("CUDA platform is not available because: %s", str(e))

return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None


def rocm_platform_plugin() -> Optional[str]:
is_rocm = False

logger.debug("Checking if ROCm platform is available.")
try:
import amdsmi
amdsmi.amdsmi_init()
try:
if len(amdsmi.amdsmi_get_processor_handles()) > 0:
is_rocm = True
logger.debug("Confirmed ROCm platform is available.")
finally:
amdsmi.amdsmi_shut_down()
except Exception:
except Exception as e:
logger.debug("ROCm platform is not available because: %s", str(e))
pass

return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None


def hpu_platform_plugin() -> Optional[str]:
is_hpu = False
logger.debug("Checking if HPU platform is available.")
try:
from importlib import util
is_hpu = util.find_spec('habana_frameworks') is not None
except Exception:
if is_hpu:
logger.debug("Confirmed HPU platform is available.")
else:
logger.debug("HPU platform is not available because "
"habana_frameworks is not found.")
except Exception as e:
logger.debug("HPU platform is not available because: %s", str(e))
pass

return "vllm.platforms.hpu.HpuPlatform" if is_hpu else None


def xpu_platform_plugin() -> Optional[str]:
is_xpu = False

logger.debug("Checking if XPU platform is available.")
try:
# installed IPEX if the machine has XPUs.
import intel_extension_for_pytorch # noqa: F401
import oneccl_bindings_for_pytorch # noqa: F401
import torch
if hasattr(torch, 'xpu') and torch.xpu.is_available():
is_xpu = True
except Exception:
logger.debug("Confirmed XPU platform is available.")
except Exception as e:
logger.debug("XPU platform is not available because: %s", str(e))
pass

return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None


def cpu_platform_plugin() -> Optional[str]:
is_cpu = False
logger.debug("Checking if CPU platform is available.")
try:
is_cpu = vllm_version_matches_substr("cpu")
if is_cpu:
logger.debug("Confirmed CPU platform is available because"
" vLLM is built with CPU.")
if not is_cpu:
import platform
is_cpu = platform.machine().lower().startswith("arm")
if is_cpu:
logger.debug("Confirmed CPU platform is available"
" because the machine is ARM.")

except Exception:
except Exception as e:
logger.debug("CPU platform is not available because: %s", str(e))
pass

return "vllm.platforms.cpu.CpuPlatform" if is_cpu else None


def neuron_platform_plugin() -> Optional[str]:
is_neuron = False
logger.debug("Checking if Neuron platform is available.")
try:
import transformers_neuronx # noqa: F401
is_neuron = True
except ImportError:
logger.debug("Confirmed Neuron platform is available because"
" transformers_neuronx is found.")
except ImportError as e:
logger.debug("Neuron platform is not available because: %s", str(e))
pass

return "vllm.platforms.neuron.NeuronPlatform" if is_neuron else None


def openvino_platform_plugin() -> Optional[str]:
is_openvino = False
logger.debug("Checking if OpenVINO platform is available.")
with suppress(Exception):
is_openvino = vllm_version_matches_substr("openvino")
if is_openvino:
logger.debug("Confirmed OpenVINO platform is available"
" because vLLM is built with OpenVINO.")
if not is_openvino:
logger.debug("OpenVINO platform is not available because"
" vLLM is not built with OpenVINO.")

return "vllm.platforms.openvino.OpenVinoPlatform" if is_openvino else None

Expand Down