Skip to content

Commit e7c9c50

Browse files
committed
fix(setup): improve precompiled wheel setup for Docker builds
This refactors the logic for handling `VLLM_USE_PRECOMPILED` to ensure that Docker builds extract only the required .so files and properly modify the package_data before setup() - Removes errant precompiled wheel copy that was copying old code - e.g. not code from the current checkout. - Moves precompiled wheel extraction logic into a utility class - Applies package_data patch before calling setup() - Now skips build_ext when precompiled is enabled - Supports fallback to nightly wheel if latest commit isn't available Follow-up to PR #21964 as part of improving build times for CI. Signed-off-by: dougbtv <dosmith@redhat.com>
1 parent 9484641 commit e7c9c50

File tree

3 files changed

+104
-124
lines changed

3 files changed

+104
-124
lines changed

docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
370370
fi
371371

372372
# Install vllm wheel first, so that torch etc will be installed.
373+
# !bang
373374
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
374375
--mount=type=cache,target=/root/.cache/uv \
375376
uv pip install --system dist/*.whl --verbose \

requirements/test.txt

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ aiohttp==3.10.11
2222
aiohttp-cors==0.8.1
2323
# via ray
2424
aiosignal==1.3.1
25-
# via
26-
# aiohttp
27-
# ray
25+
# via aiohttp
2826
albucore==0.0.16
2927
# via terratorch
3028
albumentations==1.4.6
@@ -139,7 +137,7 @@ contourpy==1.3.0
139137
# via matplotlib
140138
cramjam==2.9.0
141139
# via fastparquet
142-
cupy-cuda12x==13.3.0
140+
cupy-cuda12x==13.5.1
143141
# via ray
144142
cycler==0.12.1
145143
# via matplotlib
@@ -226,7 +224,6 @@ frozenlist==1.5.0
226224
# via
227225
# aiohttp
228226
# aiosignal
229-
# ray
230227
fsspec==2024.9.0
231228
# via
232229
# datasets
@@ -603,10 +600,18 @@ opencv-python-headless==4.11.0.86
603600
opentelemetry-api==1.35.0
604601
# via
605602
# mlflow-skinny
603+
# opentelemetry-exporter-prometheus
606604
# opentelemetry-sdk
607605
# opentelemetry-semantic-conventions
606+
opentelemetry-exporter-prometheus==0.56b0
607+
# via ray
608+
opentelemetry-proto==1.36.0
609+
# via ray
608610
opentelemetry-sdk==1.35.0
609-
# via mlflow-skinny
611+
# via
612+
# mlflow-skinny
613+
# opentelemetry-exporter-prometheus
614+
# ray
610615
opentelemetry-semantic-conventions==0.56b0
611616
# via opentelemetry-sdk
612617
packaging==24.2
@@ -697,7 +702,9 @@ pqdm==0.2.0
697702
pretrainedmodels==0.7.4
698703
# via segmentation-models-pytorch
699704
prometheus-client==0.22.0
700-
# via ray
705+
# via
706+
# opentelemetry-exporter-prometheus
707+
# ray
701708
propcache==0.2.0
702709
# via yarl
703710
proto-plus==1.26.1
@@ -707,6 +714,7 @@ protobuf==5.28.3
707714
# google-api-core
708715
# googleapis-common-protos
709716
# mlflow-skinny
717+
# opentelemetry-proto
710718
# proto-plus
711719
# ray
712720
# tensorboardx
@@ -854,7 +862,7 @@ rasterio==1.4.3
854862
# rioxarray
855863
# terratorch
856864
# torchgeo
857-
ray==2.43.0
865+
ray==2.48.0
858866
# via -r requirements/test.in
859867
redis==5.2.0
860868
# via tensorizer

setup.py

Lines changed: 87 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,69 @@ def run(self):
282282
self.copy_file(file, dst_file)
283283

284284

285-
class repackage_wheel(build_ext):
285+
class precompiled_wheel_utils:
286286
"""Extracts libraries and other files from an existing wheel."""
287287

288-
def get_base_commit_in_main_branch(self) -> str:
288+
@staticmethod
289+
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
290+
import tempfile
291+
import zipfile
292+
293+
temp_dir = None
294+
try:
295+
if not os.path.isfile(wheel_url_or_path):
296+
wheel_filename = wheel_url_or_path.split("/")[-1]
297+
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
298+
wheel_path = os.path.join(temp_dir, wheel_filename)
299+
print(f"Downloading wheel from {wheel_url_or_path} "
300+
f"to {wheel_path}")
301+
from urllib.request import urlretrieve
302+
urlretrieve(wheel_url_or_path, filename=wheel_path)
303+
else:
304+
wheel_path = wheel_url_or_path
305+
print(f"Using existing wheel at {wheel_path}")
306+
307+
package_data_patch = {}
308+
309+
with zipfile.ZipFile(wheel_path) as wheel:
310+
files_to_copy = [
311+
"vllm/_C.abi3.so",
312+
"vllm/_moe_C.abi3.so",
313+
"vllm/_flashmla_C.abi3.so",
314+
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
315+
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
316+
"vllm/cumem_allocator.abi3.so",
317+
]
318+
319+
compiled_regex = re.compile(
320+
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
321+
file_members = list(
322+
filter(lambda x: x.filename in files_to_copy,
323+
wheel.filelist))
324+
file_members += list(
325+
filter(lambda x: compiled_regex.match(x.filename),
326+
wheel.filelist))
327+
328+
for file in file_members:
329+
print(f"[extract] {file.filename}")
330+
target_path = os.path.join(".", file.filename)
331+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
332+
with wheel.open(file.filename) as src, open(
333+
target_path, "wb") as dst:
334+
shutil.copyfileobj(src, dst)
335+
336+
pkg = os.path.dirname(file.filename).replace("/", ".")
337+
package_data_patch.setdefault(pkg, []).append(
338+
os.path.basename(file.filename))
339+
340+
return package_data_patch
341+
finally:
342+
if temp_dir is not None:
343+
print(f"Removing temporary directory {temp_dir}")
344+
shutil.rmtree(temp_dir)
345+
346+
@staticmethod
347+
def get_base_commit_in_main_branch() -> str:
289348
# Force to use the nightly wheel. This is mainly used for CI testing.
290349
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
291350
return "nightly"
@@ -334,115 +393,6 @@ def get_base_commit_in_main_branch(self) -> str:
334393
"wheel may not be compatible with your dev branch: %s", err)
335394
return "nightly"
336395

337-
def run(self) -> None:
338-
assert _is_cuda(
339-
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
340-
341-
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
342-
if wheel_location is None:
343-
base_commit = self.get_base_commit_in_main_branch()
344-
wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
345-
# Fallback to nightly wheel if latest commit wheel is unavailable,
346-
# in this rare case, the nightly release CI hasn't finished on main.
347-
if not is_url_available(wheel_location):
348-
wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
349-
350-
import zipfile
351-
352-
if os.path.isfile(wheel_location):
353-
wheel_path = wheel_location
354-
print(f"Using existing wheel={wheel_path}")
355-
else:
356-
# Download the wheel from a given URL, assume
357-
# the filename is the last part of the URL
358-
wheel_filename = wheel_location.split("/")[-1]
359-
360-
import tempfile
361-
362-
# create a temporary directory to store the wheel
363-
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
364-
wheel_path = os.path.join(temp_dir, wheel_filename)
365-
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
366-
from urllib.request import urlretrieve
367-
try:
368-
urlretrieve(wheel_location, filename=wheel_path)
369-
except Exception as e:
370-
from setuptools.errors import SetupError
371-
raise SetupError(
372-
f"Failed to get vLLM wheel from {wheel_location}") from e
373-
374-
# Set the dist_dir for Docker build context
375-
dist_dir = ("/workspace/dist"
376-
if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
377-
os.makedirs(dist_dir, exist_ok=True)
378-
379-
# Extract only necessary compiled .so files from precompiled wheel
380-
with zipfile.ZipFile(wheel_path) as wheel:
381-
# Get version from METADATA (optional, mostly useful for logging)
382-
metadata_file = next((n for n in wheel.namelist()
383-
if n.endswith(".dist-info/METADATA")), None)
384-
if not metadata_file:
385-
raise RuntimeError(
386-
"Could not find METADATA in precompiled wheel.")
387-
metadata = wheel.read(metadata_file).decode()
388-
version_line = next((line for line in metadata.splitlines()
389-
if line.startswith("Version: ")), None)
390-
if not version_line:
391-
raise RuntimeError(
392-
"Could not determine version from METADATA.")
393-
version = version_line.split(": ")[1].strip()
394-
395-
print(f"Extracting precompiled kernels from vLLM wheel version: "
396-
f"{version}")
397-
398-
# List of compiled shared objects to extract
399-
files_to_copy = [
400-
"vllm/_C.abi3.so",
401-
"vllm/_moe_C.abi3.so",
402-
"vllm/_flashmla_C.abi3.so",
403-
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
404-
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
405-
"vllm/cumem_allocator.abi3.so",
406-
]
407-
408-
file_members = list(
409-
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
410-
compiled_regex = re.compile(
411-
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
412-
file_members += list(
413-
filter(lambda x: compiled_regex.match(x.filename),
414-
wheel.filelist))
415-
416-
for file in file_members:
417-
print(f"Extracting and including {file.filename} "
418-
"from existing wheel")
419-
package_name = os.path.dirname(file.filename).replace("/", ".")
420-
file_name = os.path.basename(file.filename)
421-
422-
if package_name not in package_data:
423-
package_data[package_name] = []
424-
425-
output_base = (dist_dir
426-
if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
427-
target_path = os.path.join(output_base, file.filename)
428-
os.makedirs(os.path.dirname(target_path), exist_ok=True)
429-
with wheel.open(file.filename) as src, open(target_path,
430-
"wb") as dst:
431-
shutil.copyfileobj(src, dst)
432-
433-
package_data[package_name].append(file_name)
434-
435-
# Copy wheel into dist dir for Docker to consume (e.g., via --mount)
436-
if envs.VLLM_DOCKER_BUILD_CONTEXT:
437-
arch_tag = "cp38-abi3-manylinux1_x86_64"
438-
corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
439-
final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
440-
441-
print(
442-
"Docker build context detected, copying precompiled wheel to "
443-
f"{final_wheel_path}")
444-
shutil.copy2(wheel_path, final_wheel_path)
445-
446396

447397
def _no_device() -> bool:
448398
return VLLM_TARGET_DEVICE == "empty"
@@ -676,16 +626,37 @@ def _read_requirements(filename: str) -> list[str]:
676626
]
677627
}
678628

629+
# If using precompiled, extract and patch package_data (in advance of setup)
630+
if envs.VLLM_USE_PRECOMPILED:
631+
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
632+
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
633+
if wheel_location is not None:
634+
wheel_url = wheel_location
635+
else:
636+
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
637+
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
638+
from urllib.request import urlopen
639+
try:
640+
with urlopen(wheel_url) as resp:
641+
if resp.status != 200:
642+
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
643+
except Exception as e:
644+
print(f"[warn] Falling back to nightly wheel: {e}")
645+
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
646+
647+
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
648+
wheel_url)
649+
for pkg, files in patch.items():
650+
package_data.setdefault(pkg, []).extend(files)
651+
679652
if _no_device():
680653
ext_modules = []
681654

682-
if not ext_modules:
655+
if not ext_modules or envs.VLLM_USE_PRECOMPILED:
656+
# Disable build_ext when using precompiled wheel
683657
cmdclass = {}
684658
else:
685-
cmdclass = {
686-
"build_ext":
687-
repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
688-
}
659+
cmdclass = {"build_ext": cmake_build_ext}
689660

690661
setup(
691662
# static metadata should rather go in pyproject.toml

0 commit comments

Comments
 (0)