[CI/Build] build on empty device for better dev experience (vllm-project#4773)

tomeras91 · LeiWang1999 · commit c7fa609b7923 · 2025-03-26T10:11:29.000Z
Signed-off-by: LeiWang1999 &lt;leiwang1999@outlook.com&gt;
diff --git a/requirements-cuda.txt b/requirements-cuda.txt
@@ -7,5 +7,5 @@ nvidia-ml-py # for pynvml package
 torch == 2.4.0
 # These must be updated alongside torch
 torchvision == 0.19   # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
-xformers == 0.0.27.post2  # Requires PyTorch 2.4.0
-vllm-flash-attn == 2.6.1  # Requires PyTorch 2.4.0
+xformers == 0.0.27.post2; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.4.0
+vllm-flash-attn == 2.6.1; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.4.0
diff --git a/setup.py b/setup.py
@@ -61,9 +61,12 @@ def embed_commit_hash():
 
 VLLM_TARGET_DEVICE = envs.VLLM_TARGET_DEVICE
 
-# vLLM only supports Linux platform
-assert sys.platform.startswith(
-    "linux"), "vLLM only supports Linux platform (including WSL)."
+if not sys.platform.startswith("linux"):
+    logger.warning(
+        "vLLM only supports Linux platform (including WSL). "
+        "Building on %s, "
+        "so vLLM may not be able to run correctly", sys.platform)
+    VLLM_TARGET_DEVICE = "empty"
 
 MAIN_CUDA_VERSION = "12.1"
 
@@ -231,6 +234,10 @@ def build_extensions(self) -> None:
         subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
 
 
+def _no_device() -> bool:
+    return VLLM_TARGET_DEVICE == "empty"
+
+
 def _is_cuda() -> bool:
     has_cuda = torch.version.cuda is not None
     return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
@@ -350,7 +357,9 @@ def find_version(filepath: str) -> str:
 def get_vllm_version() -> str:
     version = find_version(get_path("vllm", "version.py"))
 
-    if _is_cuda():
+    if _no_device():
+        version += "+empty"
+    elif _is_cuda():
         cuda_version = str(get_nvcc_cuda_version())
         if cuda_version != MAIN_CUDA_VERSION:
             cuda_version_str = cuda_version.replace(".", "")[:3]
@@ -404,7 +413,9 @@ def _read_requirements(filename: str) -> List[str]:
                 resolved_requirements.append(line)
         return resolved_requirements
 
-    if _is_cuda():
+    if _no_device():
+        requirements = _read_requirements("requirements-cuda.txt")
+    elif _is_cuda():
         requirements = _read_requirements("requirements-cuda.txt")
         cuda_major, cuda_minor = torch.version.cuda.split(".")
         modified_requirements = []
@@ -453,6 +464,9 @@ def _read_requirements(filename: str) -> List[str]:
     ext_modules = []
     package_data["vllm"].append("*.so")
 
+if _no_device():
+    ext_modules = []
+
 setup(
     name="vllm",
     version=get_vllm_version(),