Skip to content

Commit 886897d

Browse files
committed
Revert "fix(setup): improve precompiled wheel setup for Docker builds (#22025)"
This reverts commit 58bb902. Signed-off-by: Kebe <mail@kebe7jun.com>
1 parent 3700642 commit 886897d

File tree

3 files changed

+124
-104
lines changed

3 files changed

+124
-104
lines changed

docker/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
369369
fi
370370

371371
# Install vllm wheel first, so that torch etc will be installed.
372-
# !bang
373372
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
374373
--mount=type=cache,target=/root/.cache/uv \
375374
uv pip install --system dist/*.whl --verbose \

requirements/test.txt

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ aiohttp==3.10.11
2222
aiohttp-cors==0.8.1
2323
# via ray
2424
aiosignal==1.3.1
25-
# via aiohttp
25+
# via
26+
# aiohttp
27+
# ray
2628
albucore==0.0.16
2729
# via terratorch
2830
albumentations==1.4.6
@@ -137,7 +139,7 @@ contourpy==1.3.0
137139
# via matplotlib
138140
cramjam==2.9.0
139141
# via fastparquet
140-
cupy-cuda12x==13.5.1
142+
cupy-cuda12x==13.3.0
141143
# via ray
142144
cycler==0.12.1
143145
# via matplotlib
@@ -224,6 +226,7 @@ frozenlist==1.5.0
224226
# via
225227
# aiohttp
226228
# aiosignal
229+
# ray
227230
fsspec==2024.9.0
228231
# via
229232
# datasets
@@ -600,18 +603,10 @@ opencv-python-headless==4.11.0.86
600603
opentelemetry-api==1.35.0
601604
# via
602605
# mlflow-skinny
603-
# opentelemetry-exporter-prometheus
604606
# opentelemetry-sdk
605607
# opentelemetry-semantic-conventions
606-
opentelemetry-exporter-prometheus==0.56b0
607-
# via ray
608-
opentelemetry-proto==1.36.0
609-
# via ray
610608
opentelemetry-sdk==1.35.0
611-
# via
612-
# mlflow-skinny
613-
# opentelemetry-exporter-prometheus
614-
# ray
609+
# via mlflow-skinny
615610
opentelemetry-semantic-conventions==0.56b0
616611
# via opentelemetry-sdk
617612
packaging==24.2
@@ -702,9 +697,7 @@ pqdm==0.2.0
702697
pretrainedmodels==0.7.4
703698
# via segmentation-models-pytorch
704699
prometheus-client==0.22.0
705-
# via
706-
# opentelemetry-exporter-prometheus
707-
# ray
700+
# via ray
708701
propcache==0.2.0
709702
# via yarl
710703
proto-plus==1.26.1
@@ -714,7 +707,6 @@ protobuf==5.28.3
714707
# google-api-core
715708
# googleapis-common-protos
716709
# mlflow-skinny
717-
# opentelemetry-proto
718710
# proto-plus
719711
# ray
720712
# tensorboardx
@@ -862,7 +854,7 @@ rasterio==1.4.3
862854
# rioxarray
863855
# terratorch
864856
# torchgeo
865-
ray==2.48.0
857+
ray==2.43.0
866858
# via -r requirements/test.in
867859
redis==5.2.0
868860
# via tensorizer

setup.py

Lines changed: 116 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -282,69 +282,10 @@ def run(self):
282282
self.copy_file(file, dst_file)
283283

284284

285-
class precompiled_wheel_utils:
285+
class repackage_wheel(build_ext):
286286
"""Extracts libraries and other files from an existing wheel."""
287287

288-
@staticmethod
289-
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
290-
import tempfile
291-
import zipfile
292-
293-
temp_dir = None
294-
try:
295-
if not os.path.isfile(wheel_url_or_path):
296-
wheel_filename = wheel_url_or_path.split("/")[-1]
297-
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
298-
wheel_path = os.path.join(temp_dir, wheel_filename)
299-
print(f"Downloading wheel from {wheel_url_or_path} "
300-
f"to {wheel_path}")
301-
from urllib.request import urlretrieve
302-
urlretrieve(wheel_url_or_path, filename=wheel_path)
303-
else:
304-
wheel_path = wheel_url_or_path
305-
print(f"Using existing wheel at {wheel_path}")
306-
307-
package_data_patch = {}
308-
309-
with zipfile.ZipFile(wheel_path) as wheel:
310-
files_to_copy = [
311-
"vllm/_C.abi3.so",
312-
"vllm/_moe_C.abi3.so",
313-
"vllm/_flashmla_C.abi3.so",
314-
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
315-
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
316-
"vllm/cumem_allocator.abi3.so",
317-
]
318-
319-
compiled_regex = re.compile(
320-
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
321-
file_members = list(
322-
filter(lambda x: x.filename in files_to_copy,
323-
wheel.filelist))
324-
file_members += list(
325-
filter(lambda x: compiled_regex.match(x.filename),
326-
wheel.filelist))
327-
328-
for file in file_members:
329-
print(f"[extract] {file.filename}")
330-
target_path = os.path.join(".", file.filename)
331-
os.makedirs(os.path.dirname(target_path), exist_ok=True)
332-
with wheel.open(file.filename) as src, open(
333-
target_path, "wb") as dst:
334-
shutil.copyfileobj(src, dst)
335-
336-
pkg = os.path.dirname(file.filename).replace("/", ".")
337-
package_data_patch.setdefault(pkg, []).append(
338-
os.path.basename(file.filename))
339-
340-
return package_data_patch
341-
finally:
342-
if temp_dir is not None:
343-
print(f"Removing temporary directory {temp_dir}")
344-
shutil.rmtree(temp_dir)
345-
346-
@staticmethod
347-
def get_base_commit_in_main_branch() -> str:
288+
def get_base_commit_in_main_branch(self) -> str:
348289
# Force to use the nightly wheel. This is mainly used for CI testing.
349290
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
350291
return "nightly"
@@ -393,6 +334,115 @@ def get_base_commit_in_main_branch() -> str:
393334
"wheel may not be compatible with your dev branch: %s", err)
394335
return "nightly"
395336

337+
def run(self) -> None:
338+
assert _is_cuda(
339+
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
340+
341+
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
342+
if wheel_location is None:
343+
base_commit = self.get_base_commit_in_main_branch()
344+
wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
345+
# Fallback to nightly wheel if latest commit wheel is unavailable,
346+
# in this rare case, the nightly release CI hasn't finished on main.
347+
if not is_url_available(wheel_location):
348+
wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
349+
350+
import zipfile
351+
352+
if os.path.isfile(wheel_location):
353+
wheel_path = wheel_location
354+
print(f"Using existing wheel={wheel_path}")
355+
else:
356+
# Download the wheel from a given URL, assume
357+
# the filename is the last part of the URL
358+
wheel_filename = wheel_location.split("/")[-1]
359+
360+
import tempfile
361+
362+
# create a temporary directory to store the wheel
363+
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
364+
wheel_path = os.path.join(temp_dir, wheel_filename)
365+
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
366+
from urllib.request import urlretrieve
367+
try:
368+
urlretrieve(wheel_location, filename=wheel_path)
369+
except Exception as e:
370+
from setuptools.errors import SetupError
371+
raise SetupError(
372+
f"Failed to get vLLM wheel from {wheel_location}") from e
373+
374+
# Set the dist_dir for Docker build context
375+
dist_dir = ("/workspace/dist"
376+
if envs.VLLM_DOCKER_BUILD_CONTEXT else "dist")
377+
os.makedirs(dist_dir, exist_ok=True)
378+
379+
# Extract only necessary compiled .so files from precompiled wheel
380+
with zipfile.ZipFile(wheel_path) as wheel:
381+
# Get version from METADATA (optional, mostly useful for logging)
382+
metadata_file = next((n for n in wheel.namelist()
383+
if n.endswith(".dist-info/METADATA")), None)
384+
if not metadata_file:
385+
raise RuntimeError(
386+
"Could not find METADATA in precompiled wheel.")
387+
metadata = wheel.read(metadata_file).decode()
388+
version_line = next((line for line in metadata.splitlines()
389+
if line.startswith("Version: ")), None)
390+
if not version_line:
391+
raise RuntimeError(
392+
"Could not determine version from METADATA.")
393+
version = version_line.split(": ")[1].strip()
394+
395+
print(f"Extracting precompiled kernels from vLLM wheel version: "
396+
f"{version}")
397+
398+
# List of compiled shared objects to extract
399+
files_to_copy = [
400+
"vllm/_C.abi3.so",
401+
"vllm/_moe_C.abi3.so",
402+
"vllm/_flashmla_C.abi3.so",
403+
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
404+
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
405+
"vllm/cumem_allocator.abi3.so",
406+
]
407+
408+
file_members = list(
409+
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
410+
compiled_regex = re.compile(
411+
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
412+
file_members += list(
413+
filter(lambda x: compiled_regex.match(x.filename),
414+
wheel.filelist))
415+
416+
for file in file_members:
417+
print(f"Extracting and including {file.filename} "
418+
"from existing wheel")
419+
package_name = os.path.dirname(file.filename).replace("/", ".")
420+
file_name = os.path.basename(file.filename)
421+
422+
if package_name not in package_data:
423+
package_data[package_name] = []
424+
425+
output_base = (dist_dir
426+
if envs.VLLM_DOCKER_BUILD_CONTEXT else ".")
427+
target_path = os.path.join(output_base, file.filename)
428+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
429+
with wheel.open(file.filename) as src, open(target_path,
430+
"wb") as dst:
431+
shutil.copyfileobj(src, dst)
432+
433+
package_data[package_name].append(file_name)
434+
435+
# Copy wheel into dist dir for Docker to consume (e.g., via --mount)
436+
if envs.VLLM_DOCKER_BUILD_CONTEXT:
437+
arch_tag = "cp38-abi3-manylinux1_x86_64"
438+
corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
439+
final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
440+
441+
print(
442+
"Docker build context detected, copying precompiled wheel to "
443+
f"{final_wheel_path}")
444+
shutil.copy2(wheel_path, final_wheel_path)
445+
396446

397447
def _no_device() -> bool:
398448
return VLLM_TARGET_DEVICE == "empty"
@@ -626,37 +676,16 @@ def _read_requirements(filename: str) -> list[str]:
626676
]
627677
}
628678

629-
# If using precompiled, extract and patch package_data (in advance of setup)
630-
if envs.VLLM_USE_PRECOMPILED:
631-
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
632-
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
633-
if wheel_location is not None:
634-
wheel_url = wheel_location
635-
else:
636-
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
637-
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
638-
from urllib.request import urlopen
639-
try:
640-
with urlopen(wheel_url) as resp:
641-
if resp.status != 200:
642-
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
643-
except Exception as e:
644-
print(f"[warn] Falling back to nightly wheel: {e}")
645-
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
646-
647-
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
648-
wheel_url)
649-
for pkg, files in patch.items():
650-
package_data.setdefault(pkg, []).extend(files)
651-
652679
if _no_device():
653680
ext_modules = []
654681

655-
if not ext_modules or envs.VLLM_USE_PRECOMPILED:
656-
# Disable build_ext when using precompiled wheel
682+
if not ext_modules:
657683
cmdclass = {}
658684
else:
659-
cmdclass = {"build_ext": cmake_build_ext}
685+
cmdclass = {
686+
"build_ext":
687+
repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
688+
}
660689

661690
setup(
662691
# static metadata should rather go in pyproject.toml

0 commit comments

Comments
 (0)