Skip to content

Commit d86a64a

Browse files
committed
Add CPU CMake extension.
1 parent b37cdce commit d86a64a

File tree

3 files changed

+108
-4
lines changed

3 files changed

+108
-4
lines changed

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ cmake_minimum_required(VERSION 3.21)
22

33
project(vllm_extensions LANGUAGES CXX)
44

5+
option(VLLM_TARGET_DEVICE "Target device backend for vLLM" "cpu")
6+
57
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
8+
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
69

710
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
811

@@ -69,6 +72,19 @@ find_package(Torch REQUIRED)
6972
#
7073
append_torchlib_if_found(torch_python)
7174

75+
#
76+
# Forward the non-CUDA device extensions to external CMake scripts.
77+
#
78+
if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND
79+
NOT VLLM_TARGET_DEVICE STREQUAL "rocm")
80+
if (VLLM_TARGET_DEVICE STREQUAL "cpu")
81+
include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake)
82+
else()
83+
message(FATAL_ERROR "Unsupported vLLM target device: ${VLLM_TARGET_DEVICE}")
84+
endif()
85+
return()
86+
endif()
87+
7288
#
7389
# Set up GPU language and check the torch version and warn if it isn't
7490
# what is expected.

cmake/cpu_extension.cmake

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#
2+
# Check the compile flags
3+
#
4+
list(APPEND CXX_COMPILE_FLAGS
5+
"-fopenmp"
6+
"-DVLLM_CPU_EXTENSION")
7+
8+
execute_process(COMMAND cat /proc/cpuinfo
9+
RESULT_VARIABLE CPUINFO_RET
10+
OUTPUT_VARIABLE CPUINFO)
11+
12+
if (NOT CPUINFO_RET EQUAL 0)
13+
message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
14+
endif()
15+
16+
function (find_isa CPUINFO TARGET OUT)
17+
string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
18+
if(NOT ISA_FOUND EQUAL -1)
19+
set(${OUT} ON PARENT_SCOPE)
20+
else()
21+
set(${OUT} OFF PARENT_SCOPE)
22+
endif()
23+
endfunction()
24+
25+
find_isa(${CPUINFO} "avx512f" AVX512_FOUND)
26+
27+
if (AVX512_FOUND)
28+
list(APPEND CXX_COMPILE_FLAGS
29+
"-mavx512f"
30+
"-mavx512vl"
31+
"-mavx512bw"
32+
"-mavx512dq")
33+
34+
find_isa(${CPUINFO} "avx512_bf16" AVX512BF16_FOUND)
35+
if (AVX512BF16_FOUND AND
36+
CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
37+
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)
38+
list(APPEND CXX_COMPILE_FLAGS "-mavx512bf16")
39+
else()
40+
message(WARNING "Disable AVX512-BF16 ISA support, requires gcc/g++ >= 12.3")
41+
endif()
42+
else()
43+
message(FATAL_ERROR "vLLM CPU backend requires AVX512 ISA support.")
44+
endif()
45+
46+
message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
47+
48+
message(FATAL_ERROR "vLLM CPU backend is unavailable")
49+
50+
#
51+
# Define extension targets
52+
#
53+
54+
#
55+
# _C extension
56+
#
57+
set(VLLM_EXT_SRC
58+
"csrc/cpu/activation.cpp"
59+
"csrc/cpu/attention.cpp"
60+
"csrc/cpu/cache.cpp"
61+
"csrc/cpu/layernorm.cpp"
62+
"csrc/cpu/pos_encoding.cpp"
63+
"csrc/pybind.cpp")
64+
65+
define_gpu_extension_target(
66+
_C
67+
DESTINATION vllm
68+
LANGUAGE CXX
69+
SOURCES ${VLLM_EXT_SRC}
70+
COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
71+
WITH_SOABI
72+
)
73+
74+
add_custom_target(default)
75+
message(STATUS "Enabling C extension.")
76+
add_dependencies(default _C)
77+

setup.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from torch.utils.cpp_extension import CUDA_HOME
1414

1515
ROOT_DIR = os.path.dirname(__file__)
16+
# Target device of vLLM, supporting [cuda (by default), rocm, neuron]
17+
VLLM_TARGET_DEVICE = os.getenv("VLLM_TARGET_DEVICE", "cuda")
1618

1719
# vLLM only supports Linux platform
1820
assert sys.platform.startswith(
@@ -61,8 +63,7 @@ def compute_num_jobs(self):
6163
except AttributeError:
6264
num_jobs = os.cpu_count()
6365

64-
nvcc_cuda_version = get_nvcc_cuda_version()
65-
if nvcc_cuda_version >= Version("11.2"):
66+
if _is_cuda() and get_nvcc_cuda_version() >= Version("11.2"):
6667
nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
6768
num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
6869
else:
@@ -95,6 +96,7 @@ def configure(self, ext: CMakeExtension) -> None:
9596
'-DCMAKE_BUILD_TYPE={}'.format(cfg),
9697
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
9798
'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
99+
'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
98100
]
99101

100102
verbose = bool(int(os.getenv('VERBOSE', '0')))
@@ -168,11 +170,12 @@ def build_extensions(self) -> None:
168170

169171

170172
def _is_cuda() -> bool:
171-
return torch.version.cuda is not None
173+
return VLLM_TARGET_DEVICE == "cuda" and torch.version.cuda is not None
172174

173175

174176
def _is_hip() -> bool:
175-
return torch.version.hip is not None
177+
return (VLLM_TARGET_DEVICE == "cuda"
178+
or VLLM_TARGET_DEVICE == "rocm") and torch.version.hip is not None
176179

177180

178181
def _is_neuron() -> bool:
@@ -184,6 +187,10 @@ def _is_neuron() -> bool:
184187
return torch_neuronx_installed
185188

186189

190+
def _is_cpu() -> bool:
191+
return VLLM_TARGET_DEVICE == "cpu"
192+
193+
187194
def _install_punica() -> bool:
188195
return bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))
189196

@@ -279,6 +286,8 @@ def get_vllm_version() -> str:
279286
if neuron_version != MAIN_CUDA_VERSION:
280287
neuron_version_str = neuron_version.replace(".", "")[:3]
281288
version += f"+neuron{neuron_version_str}"
289+
elif _is_cpu():
290+
version += "+cpu"
282291
else:
283292
raise RuntimeError("Unknown runtime environment")
284293

@@ -311,6 +320,8 @@ def get_requirements() -> List[str]:
311320
elif _is_neuron():
312321
with open(get_path("requirements-neuron.txt")) as f:
313322
requirements = f.read().strip().split("\n")
323+
elif _is_cpu():
324+
requirements = []
314325
else:
315326
raise ValueError(
316327
"Unsupported platform, please use CUDA, ROCM or Neuron.")

0 commit comments

Comments
 (0)