@@ -282,10 +282,69 @@ def run(self):
282282 self .copy_file (file , dst_file )
283283
284284
285- class repackage_wheel ( build_ext ) :
285+ class precompiled_wheel_utils :
286286 """Extracts libraries and other files from an existing wheel."""
287287
288- def get_base_commit_in_main_branch (self ) -> str :
288+ @staticmethod
289+ def extract_precompiled_and_patch_package (wheel_url_or_path : str ) -> dict :
290+ import tempfile
291+ import zipfile
292+
293+ temp_dir = None
294+ try :
295+ if not os .path .isfile (wheel_url_or_path ):
296+ wheel_filename = wheel_url_or_path .split ("/" )[- 1 ]
297+ temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
298+ wheel_path = os .path .join (temp_dir , wheel_filename )
299+ print (f"Downloading wheel from { wheel_url_or_path } "
300+ f"to { wheel_path } " )
301+ from urllib .request import urlretrieve
302+ urlretrieve (wheel_url_or_path , filename = wheel_path )
303+ else :
304+ wheel_path = wheel_url_or_path
305+ print (f"Using existing wheel at { wheel_path } " )
306+
307+ package_data_patch = {}
308+
309+ with zipfile .ZipFile (wheel_path ) as wheel :
310+ files_to_copy = [
311+ "vllm/_C.abi3.so" ,
312+ "vllm/_moe_C.abi3.so" ,
313+ "vllm/_flashmla_C.abi3.so" ,
314+ "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
315+ "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
316+ "vllm/cumem_allocator.abi3.so" ,
317+ ]
318+
319+ compiled_regex = re .compile (
320+ r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
321+ file_members = list (
322+ filter (lambda x : x .filename in files_to_copy ,
323+ wheel .filelist ))
324+ file_members += list (
325+ filter (lambda x : compiled_regex .match (x .filename ),
326+ wheel .filelist ))
327+
328+ for file in file_members :
329+ print (f"[extract] { file .filename } " )
330+ target_path = os .path .join ("." , file .filename )
331+ os .makedirs (os .path .dirname (target_path ), exist_ok = True )
332+ with wheel .open (file .filename ) as src , open (
333+ target_path , "wb" ) as dst :
334+ shutil .copyfileobj (src , dst )
335+
336+ pkg = os .path .dirname (file .filename ).replace ("/" , "." )
337+ package_data_patch .setdefault (pkg , []).append (
338+ os .path .basename (file .filename ))
339+
340+ return package_data_patch
341+ finally :
342+ if temp_dir is not None :
343+ print (f"Removing temporary directory { temp_dir } " )
344+ shutil .rmtree (temp_dir )
345+
346+ @staticmethod
347+ def get_base_commit_in_main_branch () -> str :
289348 # Force to use the nightly wheel. This is mainly used for CI testing.
290349 if envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
291350 return "nightly"
@@ -334,115 +393,6 @@ def get_base_commit_in_main_branch(self) -> str:
334393 "wheel may not be compatible with your dev branch: %s" , err )
335394 return "nightly"
336395
337- def run (self ) -> None :
338- assert _is_cuda (
339- ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
340-
341- wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
342- if wheel_location is None :
343- base_commit = self .get_base_commit_in_main_branch ()
344- wheel_location = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
345- # Fallback to nightly wheel if latest commit wheel is unavailable,
346- # in this rare case, the nightly release CI hasn't finished on main.
347- if not is_url_available (wheel_location ):
348- wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
349-
350- import zipfile
351-
352- if os .path .isfile (wheel_location ):
353- wheel_path = wheel_location
354- print (f"Using existing wheel={ wheel_path } " )
355- else :
356- # Download the wheel from a given URL, assume
357- # the filename is the last part of the URL
358- wheel_filename = wheel_location .split ("/" )[- 1 ]
359-
360- import tempfile
361-
362- # create a temporary directory to store the wheel
363- temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
364- wheel_path = os .path .join (temp_dir , wheel_filename )
365- print (f"Downloading wheel from { wheel_location } to { wheel_path } " )
366- from urllib .request import urlretrieve
367- try :
368- urlretrieve (wheel_location , filename = wheel_path )
369- except Exception as e :
370- from setuptools .errors import SetupError
371- raise SetupError (
372- f"Failed to get vLLM wheel from { wheel_location } " ) from e
373-
374- # Set the dist_dir for Docker build context
375- dist_dir = ("/workspace/dist"
376- if envs .VLLM_DOCKER_BUILD_CONTEXT else "dist" )
377- os .makedirs (dist_dir , exist_ok = True )
378-
379- # Extract only necessary compiled .so files from precompiled wheel
380- with zipfile .ZipFile (wheel_path ) as wheel :
381- # Get version from METADATA (optional, mostly useful for logging)
382- metadata_file = next ((n for n in wheel .namelist ()
383- if n .endswith (".dist-info/METADATA" )), None )
384- if not metadata_file :
385- raise RuntimeError (
386- "Could not find METADATA in precompiled wheel." )
387- metadata = wheel .read (metadata_file ).decode ()
388- version_line = next ((line for line in metadata .splitlines ()
389- if line .startswith ("Version: " )), None )
390- if not version_line :
391- raise RuntimeError (
392- "Could not determine version from METADATA." )
393- version = version_line .split (": " )[1 ].strip ()
394-
395- print (f"Extracting precompiled kernels from vLLM wheel version: "
396- f"{ version } " )
397-
398- # List of compiled shared objects to extract
399- files_to_copy = [
400- "vllm/_C.abi3.so" ,
401- "vllm/_moe_C.abi3.so" ,
402- "vllm/_flashmla_C.abi3.so" ,
403- "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
404- "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
405- "vllm/cumem_allocator.abi3.so" ,
406- ]
407-
408- file_members = list (
409- filter (lambda x : x .filename in files_to_copy , wheel .filelist ))
410- compiled_regex = re .compile (
411- r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
412- file_members += list (
413- filter (lambda x : compiled_regex .match (x .filename ),
414- wheel .filelist ))
415-
416- for file in file_members :
417- print (f"Extracting and including { file .filename } "
418- "from existing wheel" )
419- package_name = os .path .dirname (file .filename ).replace ("/" , "." )
420- file_name = os .path .basename (file .filename )
421-
422- if package_name not in package_data :
423- package_data [package_name ] = []
424-
425- output_base = (dist_dir
426- if envs .VLLM_DOCKER_BUILD_CONTEXT else "." )
427- target_path = os .path .join (output_base , file .filename )
428- os .makedirs (os .path .dirname (target_path ), exist_ok = True )
429- with wheel .open (file .filename ) as src , open (target_path ,
430- "wb" ) as dst :
431- shutil .copyfileobj (src , dst )
432-
433- package_data [package_name ].append (file_name )
434-
435- # Copy wheel into dist dir for Docker to consume (e.g., via --mount)
436- if envs .VLLM_DOCKER_BUILD_CONTEXT :
437- arch_tag = "cp38-abi3-manylinux1_x86_64"
438- corrected_wheel_name = f"vllm-{ version } -{ arch_tag } .whl"
439- final_wheel_path = os .path .join (dist_dir , corrected_wheel_name )
440-
441- print (
442- "Docker build context detected, copying precompiled wheel to "
443- f"{ final_wheel_path } " )
444- shutil .copy2 (wheel_path , final_wheel_path )
445-
446396
447397def _no_device () -> bool :
448398 return VLLM_TARGET_DEVICE == "empty"
@@ -676,16 +626,37 @@ def _read_requirements(filename: str) -> list[str]:
676626 ]
677627}
678628
629+ # If using precompiled, extract and patch package_data (in advance of setup)
630+ if envs .VLLM_USE_PRECOMPILED :
631+ assert _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
632+ wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
633+ if wheel_location is not None :
634+ wheel_url = wheel_location
635+ else :
636+ base_commit = precompiled_wheel_utils .get_base_commit_in_main_branch ()
637+ wheel_url = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
638+ from urllib .request import urlopen
639+ try :
640+ with urlopen (wheel_url ) as resp :
641+ if resp .status != 200 :
642+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
643+ except Exception as e :
644+ print (f"[warn] Falling back to nightly wheel: { e } " )
645+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
646+
647+ patch = precompiled_wheel_utils .extract_precompiled_and_patch_package (
648+ wheel_url )
649+ for pkg , files in patch .items ():
650+ package_data .setdefault (pkg , []).extend (files )
651+
679652if _no_device ():
680653 ext_modules = []
681654
682- if not ext_modules :
655+ if not ext_modules or envs .VLLM_USE_PRECOMPILED :
656+ # Disable build_ext when using precompiled wheel
683657 cmdclass = {}
684658else :
685- cmdclass = {
686- "build_ext" :
687- repackage_wheel if envs .VLLM_USE_PRECOMPILED else cmake_build_ext
688- }
659+ cmdclass = {"build_ext" : cmake_build_ext }
689660
690661setup (
691662 # static metadata should rather go in pyproject.toml
0 commit comments