@@ -282,69 +282,10 @@ def run(self):
282282            self .copy_file (file , dst_file )
283283
284284
285- class  precompiled_wheel_utils :
285+ class  repackage_wheel ( build_ext ) :
286286    """Extracts libraries and other files from an existing wheel.""" 
287287
288-     @staticmethod  
289-     def  extract_precompiled_and_patch_package (wheel_url_or_path : str ) ->  dict :
290-         import  tempfile 
291-         import  zipfile 
292- 
293-         temp_dir  =  None 
294-         try :
295-             if  not  os .path .isfile (wheel_url_or_path ):
296-                 wheel_filename  =  wheel_url_or_path .split ("/" )[- 1 ]
297-                 temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
298-                 wheel_path  =  os .path .join (temp_dir , wheel_filename )
299-                 print (f"Downloading wheel from { wheel_url_or_path }  
300-                       f"to { wheel_path }  )
301-                 from  urllib .request  import  urlretrieve 
302-                 urlretrieve (wheel_url_or_path , filename = wheel_path )
303-             else :
304-                 wheel_path  =  wheel_url_or_path 
305-                 print (f"Using existing wheel at { wheel_path }  )
306- 
307-             package_data_patch  =  {}
308- 
309-             with  zipfile .ZipFile (wheel_path ) as  wheel :
310-                 files_to_copy  =  [
311-                     "vllm/_C.abi3.so" ,
312-                     "vllm/_moe_C.abi3.so" ,
313-                     "vllm/_flashmla_C.abi3.so" ,
314-                     "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
315-                     "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
316-                     "vllm/cumem_allocator.abi3.so" ,
317-                 ]
318- 
319-                 compiled_regex  =  re .compile (
320-                     r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
321-                 file_members  =  list (
322-                     filter (lambda  x : x .filename  in  files_to_copy ,
323-                            wheel .filelist ))
324-                 file_members  +=  list (
325-                     filter (lambda  x : compiled_regex .match (x .filename ),
326-                            wheel .filelist ))
327- 
328-                 for  file  in  file_members :
329-                     print (f"[extract] { file .filename }  )
330-                     target_path  =  os .path .join ("." , file .filename )
331-                     os .makedirs (os .path .dirname (target_path ), exist_ok = True )
332-                     with  wheel .open (file .filename ) as  src , open (
333-                             target_path , "wb" ) as  dst :
334-                         shutil .copyfileobj (src , dst )
335- 
336-                     pkg  =  os .path .dirname (file .filename ).replace ("/" , "." )
337-                     package_data_patch .setdefault (pkg , []).append (
338-                         os .path .basename (file .filename ))
339- 
340-             return  package_data_patch 
341-         finally :
342-             if  temp_dir  is  not None :
343-                 print (f"Removing temporary directory { temp_dir }  )
344-                 shutil .rmtree (temp_dir )
345- 
346-     @staticmethod  
347-     def  get_base_commit_in_main_branch () ->  str :
288+     def  get_base_commit_in_main_branch (self ) ->  str :
348289        # Force to use the nightly wheel. This is mainly used for CI testing. 
349290        if  envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
350291            return  "nightly" 
@@ -393,6 +334,115 @@ def get_base_commit_in_main_branch() -> str:
393334                "wheel may not be compatible with your dev branch: %s" , err )
394335            return  "nightly" 
395336
337+     def  run (self ) ->  None :
338+         assert  _is_cuda (
339+         ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
340+ 
341+         wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
342+         if  wheel_location  is  None :
343+             base_commit  =  self .get_base_commit_in_main_branch ()
344+             wheel_location  =  f"https://wheels.vllm.ai/{ base_commit }  
345+             # Fallback to nightly wheel if latest commit wheel is unavailable, 
346+             # in this rare case, the nightly release CI hasn't finished on main. 
347+             if  not  is_url_available (wheel_location ):
348+                 wheel_location  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
349+ 
350+         import  zipfile 
351+ 
352+         if  os .path .isfile (wheel_location ):
353+             wheel_path  =  wheel_location 
354+             print (f"Using existing wheel={ wheel_path }  )
355+         else :
356+             # Download the wheel from a given URL, assume 
357+             # the filename is the last part of the URL 
358+             wheel_filename  =  wheel_location .split ("/" )[- 1 ]
359+ 
360+             import  tempfile 
361+ 
362+             # create a temporary directory to store the wheel 
363+             temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
364+             wheel_path  =  os .path .join (temp_dir , wheel_filename )
365+             print (f"Downloading wheel from { wheel_location } { wheel_path }  )
366+             from  urllib .request  import  urlretrieve 
367+             try :
368+                 urlretrieve (wheel_location , filename = wheel_path )
369+             except  Exception  as  e :
370+                 from  setuptools .errors  import  SetupError 
371+                 raise  SetupError (
372+                     f"Failed to get vLLM wheel from { wheel_location }  ) from  e 
373+ 
374+         # Set the dist_dir for Docker build context 
375+         dist_dir  =  ("/workspace/dist" 
376+                     if  envs .VLLM_DOCKER_BUILD_CONTEXT  else  "dist" )
377+         os .makedirs (dist_dir , exist_ok = True )
378+ 
379+         # Extract only necessary compiled .so files from precompiled wheel 
380+         with  zipfile .ZipFile (wheel_path ) as  wheel :
381+             # Get version from METADATA (optional, mostly useful for logging) 
382+             metadata_file  =  next ((n  for  n  in  wheel .namelist ()
383+                                   if  n .endswith (".dist-info/METADATA" )), None )
384+             if  not  metadata_file :
385+                 raise  RuntimeError (
386+                     "Could not find METADATA in precompiled wheel." )
387+             metadata  =  wheel .read (metadata_file ).decode ()
388+             version_line  =  next ((line  for  line  in  metadata .splitlines ()
389+                                  if  line .startswith ("Version: " )), None )
390+             if  not  version_line :
391+                 raise  RuntimeError (
392+                     "Could not determine version from METADATA." )
393+             version  =  version_line .split (": " )[1 ].strip ()
394+ 
395+             print (f"Extracting precompiled kernels from vLLM wheel version: " 
396+                   f"{ version }  )
397+ 
398+             # List of compiled shared objects to extract 
399+             files_to_copy  =  [
400+                 "vllm/_C.abi3.so" ,
401+                 "vllm/_moe_C.abi3.so" ,
402+                 "vllm/_flashmla_C.abi3.so" ,
403+                 "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
404+                 "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
405+                 "vllm/cumem_allocator.abi3.so" ,
406+             ]
407+ 
408+             file_members  =  list (
409+                 filter (lambda  x : x .filename  in  files_to_copy , wheel .filelist ))
410+             compiled_regex  =  re .compile (
411+                 r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
412+             file_members  +=  list (
413+                 filter (lambda  x : compiled_regex .match (x .filename ),
414+                        wheel .filelist ))
415+ 
416+             for  file  in  file_members :
417+                 print (f"Extracting and including { file .filename }  
418+                       "from existing wheel" )
419+                 package_name  =  os .path .dirname (file .filename ).replace ("/" , "." )
420+                 file_name  =  os .path .basename (file .filename )
421+ 
422+                 if  package_name  not  in package_data :
423+                     package_data [package_name ] =  []
424+ 
425+                 output_base  =  (dist_dir 
426+                                if  envs .VLLM_DOCKER_BUILD_CONTEXT  else  "." )
427+                 target_path  =  os .path .join (output_base , file .filename )
428+                 os .makedirs (os .path .dirname (target_path ), exist_ok = True )
429+                 with  wheel .open (file .filename ) as  src , open (target_path ,
430+                                                             "wb" ) as  dst :
431+                     shutil .copyfileobj (src , dst )
432+ 
433+                 package_data [package_name ].append (file_name )
434+ 
435+         # Copy wheel into dist dir for Docker to consume (e.g., via --mount) 
436+         if  envs .VLLM_DOCKER_BUILD_CONTEXT :
437+             arch_tag  =  "cp38-abi3-manylinux1_x86_64" 
438+             corrected_wheel_name  =  f"vllm-{ version } { arch_tag }  
439+             final_wheel_path  =  os .path .join (dist_dir , corrected_wheel_name )
440+ 
441+             print (
442+                 "Docker build context detected, copying precompiled wheel to " 
443+                 f"{ final_wheel_path }  )
444+             shutil .copy2 (wheel_path , final_wheel_path )
445+ 
396446
397447def  _no_device () ->  bool :
398448    return  VLLM_TARGET_DEVICE  ==  "empty" 
@@ -626,37 +676,16 @@ def _read_requirements(filename: str) -> list[str]:
626676    ]
627677}
628678
629- # If using precompiled, extract and patch package_data (in advance of setup) 
630- if  envs .VLLM_USE_PRECOMPILED :
631-     assert  _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
632-     wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
633-     if  wheel_location  is  not None :
634-         wheel_url  =  wheel_location 
635-     else :
636-         base_commit  =  precompiled_wheel_utils .get_base_commit_in_main_branch ()
637-         wheel_url  =  f"https://wheels.vllm.ai/{ base_commit }  
638-         from  urllib .request  import  urlopen 
639-         try :
640-             with  urlopen (wheel_url ) as  resp :
641-                 if  resp .status  !=  200 :
642-                     wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
643-         except  Exception  as  e :
644-             print (f"[warn] Falling back to nightly wheel: { e }  )
645-             wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
646- 
647-     patch  =  precompiled_wheel_utils .extract_precompiled_and_patch_package (
648-         wheel_url )
649-     for  pkg , files  in  patch .items ():
650-         package_data .setdefault (pkg , []).extend (files )
651- 
652679if  _no_device ():
653680    ext_modules  =  []
654681
655- if  not  ext_modules  or  envs .VLLM_USE_PRECOMPILED :
656-     # Disable build_ext when using precompiled wheel 
682+ if  not  ext_modules :
657683    cmdclass  =  {}
658684else :
659-     cmdclass  =  {"build_ext" : cmake_build_ext }
685+     cmdclass  =  {
686+         "build_ext" :
687+         repackage_wheel  if  envs .VLLM_USE_PRECOMPILED  else  cmake_build_ext 
688+     }
660689
661690setup (
662691    # static metadata should rather go in pyproject.toml 
0 commit comments