File tree Expand file tree Collapse file tree 4 files changed +11
-4
lines changed
.github/workflows/scripts Expand file tree Collapse file tree 4 files changed +11
-4
lines changed Original file line number Diff line number Diff line change @@ -13,6 +13,8 @@ $python_executable -m pip install -r requirements.txt
13
13
14
14
# Limit the number of parallel jobs to avoid OOM
15
15
export MAX_JOBS=1
16
+ # Make sure punica is built for the release (for LoRA)
17
+ export VLLM_INSTALL_PUNICA_KERNELS=1
16
18
17
19
# Build
18
20
$python_executable setup.py bdist_wheel --dist-dir=dist
Original file line number Diff line number Diff line change @@ -45,6 +45,8 @@ ENV MAX_JOBS=${max_jobs}
45
45
# number of threads used by nvcc
46
46
ARG nvcc_threads=8
47
47
ENV NVCC_THREADS=$nvcc_threads
48
+ # make sure punica kernels are built (for LoRA)
49
+ ENV VLLM_INSTALL_PUNICA_KERNELS=1
48
50
49
51
RUN python3 setup.py build_ext --inplace
50
52
# ################### EXTENSION Build IMAGE ####################
Original file line number Diff line number Diff line change @@ -265,7 +265,7 @@ def get_torch_arch_list() -> Set[str]:
265
265
with contextlib .suppress (ValueError ):
266
266
torch_cpp_ext .COMMON_NVCC_FLAGS .remove (flag )
267
267
268
- install_punica = bool (int (os .getenv ("VLLM_INSTALL_PUNICA_KERNELS" , "1 " )))
268
+ install_punica = bool (int (os .getenv ("VLLM_INSTALL_PUNICA_KERNELS" , "0 " )))
269
269
device_count = torch .cuda .device_count ()
270
270
for i in range (device_count ):
271
271
major , minor = torch .cuda .get_device_capability (i )
Original file line number Diff line number Diff line change @@ -157,10 +157,13 @@ def _raise_exc(
157
157
** kwargs # pylint: disable=unused-argument
158
158
):
159
159
if torch .cuda .get_device_capability () < (8 , 0 ):
160
- raise ImportError (
161
- "LoRA kernels require compute capability>=8.0" ) from import_exc
160
+ raise ImportError ("punica LoRA kernels require compute "
161
+ " capability>=8.0" ) from import_exc
162
162
else :
163
- raise import_exc
163
+ raise ImportError (
164
+ "punica LoRA kernels could not be imported. If you built vLLM "
165
+ "from source, make sure VLLM_INSTALL_PUNICA_KERNELS=1 env var "
166
+ "was set." ) from import_exc
164
167
165
168
bgmv = _raise_exc
166
169
add_lora = _raise_exc
You can’t perform that action at this time.
0 commit comments