@@ -282,10 +282,69 @@ def run(self):
282282            self .copy_file (file , dst_file )
283283
284284
285- class  repackage_wheel ( build_ext ) :
285+ class  precompiled_wheel_utils :
286286    """Extracts libraries and other files from an existing wheel.""" 
287287
288-     def  get_base_commit_in_main_branch (self ) ->  str :
288+     @staticmethod  
289+     def  extract_precompiled_and_patch_package (wheel_url_or_path : str ) ->  dict :
290+         import  tempfile 
291+         import  zipfile 
292+ 
293+         temp_dir  =  None 
294+         try :
295+             if  not  os .path .isfile (wheel_url_or_path ):
296+                 wheel_filename  =  wheel_url_or_path .split ("/" )[- 1 ]
297+                 temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
298+                 wheel_path  =  os .path .join (temp_dir , wheel_filename )
299+                 print (f"Downloading wheel from { wheel_url_or_path }  
300+                       f"to { wheel_path }  )
301+                 from  urllib .request  import  urlretrieve 
302+                 urlretrieve (wheel_url_or_path , filename = wheel_path )
303+             else :
304+                 wheel_path  =  wheel_url_or_path 
305+                 print (f"Using existing wheel at { wheel_path }  )
306+ 
307+             package_data_patch  =  {}
308+ 
309+             with  zipfile .ZipFile (wheel_path ) as  wheel :
310+                 files_to_copy  =  [
311+                     "vllm/_C.abi3.so" ,
312+                     "vllm/_moe_C.abi3.so" ,
313+                     "vllm/_flashmla_C.abi3.so" ,
314+                     "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
315+                     "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
316+                     "vllm/cumem_allocator.abi3.so" ,
317+                 ]
318+ 
319+                 compiled_regex  =  re .compile (
320+                     r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
321+                 file_members  =  list (
322+                     filter (lambda  x : x .filename  in  files_to_copy ,
323+                            wheel .filelist ))
324+                 file_members  +=  list (
325+                     filter (lambda  x : compiled_regex .match (x .filename ),
326+                            wheel .filelist ))
327+ 
328+                 for  file  in  file_members :
329+                     print (f"[extract] { file .filename }  )
330+                     target_path  =  os .path .join ("." , file .filename )
331+                     os .makedirs (os .path .dirname (target_path ), exist_ok = True )
332+                     with  wheel .open (file .filename ) as  src , open (
333+                             target_path , "wb" ) as  dst :
334+                         shutil .copyfileobj (src , dst )
335+ 
336+                     pkg  =  os .path .dirname (file .filename ).replace ("/" , "." )
337+                     package_data_patch .setdefault (pkg , []).append (
338+                         os .path .basename (file .filename ))
339+ 
340+             return  package_data_patch 
341+         finally :
342+             if  temp_dir  is  not None :
343+                 print (f"Removing temporary directory { temp_dir }  )
344+                 shutil .rmtree (temp_dir )
345+ 
346+     @staticmethod  
347+     def  get_base_commit_in_main_branch () ->  str :
289348        # Force to use the nightly wheel. This is mainly used for CI testing. 
290349        if  envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
291350            return  "nightly" 
@@ -334,115 +393,6 @@ def get_base_commit_in_main_branch(self) -> str:
334393                "wheel may not be compatible with your dev branch: %s" , err )
335394            return  "nightly" 
336395
337-     def  run (self ) ->  None :
338-         assert  _is_cuda (
339-         ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
340- 
341-         wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
342-         if  wheel_location  is  None :
343-             base_commit  =  self .get_base_commit_in_main_branch ()
344-             wheel_location  =  f"https://wheels.vllm.ai/{ base_commit }  
345-             # Fallback to nightly wheel if latest commit wheel is unavailable, 
346-             # in this rare case, the nightly release CI hasn't finished on main. 
347-             if  not  is_url_available (wheel_location ):
348-                 wheel_location  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
349- 
350-         import  zipfile 
351- 
352-         if  os .path .isfile (wheel_location ):
353-             wheel_path  =  wheel_location 
354-             print (f"Using existing wheel={ wheel_path }  )
355-         else :
356-             # Download the wheel from a given URL, assume 
357-             # the filename is the last part of the URL 
358-             wheel_filename  =  wheel_location .split ("/" )[- 1 ]
359- 
360-             import  tempfile 
361- 
362-             # create a temporary directory to store the wheel 
363-             temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
364-             wheel_path  =  os .path .join (temp_dir , wheel_filename )
365-             print (f"Downloading wheel from { wheel_location } { wheel_path }  )
366-             from  urllib .request  import  urlretrieve 
367-             try :
368-                 urlretrieve (wheel_location , filename = wheel_path )
369-             except  Exception  as  e :
370-                 from  setuptools .errors  import  SetupError 
371-                 raise  SetupError (
372-                     f"Failed to get vLLM wheel from { wheel_location }  ) from  e 
373- 
374-         # Set the dist_dir for Docker build context 
375-         dist_dir  =  ("/workspace/dist" 
376-                     if  envs .VLLM_DOCKER_BUILD_CONTEXT  else  "dist" )
377-         os .makedirs (dist_dir , exist_ok = True )
378- 
379-         # Extract only necessary compiled .so files from precompiled wheel 
380-         with  zipfile .ZipFile (wheel_path ) as  wheel :
381-             # Get version from METADATA (optional, mostly useful for logging) 
382-             metadata_file  =  next ((n  for  n  in  wheel .namelist ()
383-                                   if  n .endswith (".dist-info/METADATA" )), None )
384-             if  not  metadata_file :
385-                 raise  RuntimeError (
386-                     "Could not find METADATA in precompiled wheel." )
387-             metadata  =  wheel .read (metadata_file ).decode ()
388-             version_line  =  next ((line  for  line  in  metadata .splitlines ()
389-                                  if  line .startswith ("Version: " )), None )
390-             if  not  version_line :
391-                 raise  RuntimeError (
392-                     "Could not determine version from METADATA." )
393-             version  =  version_line .split (": " )[1 ].strip ()
394- 
395-             print (f"Extracting precompiled kernels from vLLM wheel version: " 
396-                   f"{ version }  )
397- 
398-             # List of compiled shared objects to extract 
399-             files_to_copy  =  [
400-                 "vllm/_C.abi3.so" ,
401-                 "vllm/_moe_C.abi3.so" ,
402-                 "vllm/_flashmla_C.abi3.so" ,
403-                 "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
404-                 "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
405-                 "vllm/cumem_allocator.abi3.so" ,
406-             ]
407- 
408-             file_members  =  list (
409-                 filter (lambda  x : x .filename  in  files_to_copy , wheel .filelist ))
410-             compiled_regex  =  re .compile (
411-                 r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
412-             file_members  +=  list (
413-                 filter (lambda  x : compiled_regex .match (x .filename ),
414-                        wheel .filelist ))
415- 
416-             for  file  in  file_members :
417-                 print (f"Extracting and including { file .filename }  
418-                       "from existing wheel" )
419-                 package_name  =  os .path .dirname (file .filename ).replace ("/" , "." )
420-                 file_name  =  os .path .basename (file .filename )
421- 
422-                 if  package_name  not  in package_data :
423-                     package_data [package_name ] =  []
424- 
425-                 output_base  =  (dist_dir 
426-                                if  envs .VLLM_DOCKER_BUILD_CONTEXT  else  "." )
427-                 target_path  =  os .path .join (output_base , file .filename )
428-                 os .makedirs (os .path .dirname (target_path ), exist_ok = True )
429-                 with  wheel .open (file .filename ) as  src , open (target_path ,
430-                                                             "wb" ) as  dst :
431-                     shutil .copyfileobj (src , dst )
432- 
433-                 package_data [package_name ].append (file_name )
434- 
435-         # Copy wheel into dist dir for Docker to consume (e.g., via --mount) 
436-         if  envs .VLLM_DOCKER_BUILD_CONTEXT :
437-             arch_tag  =  "cp38-abi3-manylinux1_x86_64" 
438-             corrected_wheel_name  =  f"vllm-{ version } { arch_tag }  
439-             final_wheel_path  =  os .path .join (dist_dir , corrected_wheel_name )
440- 
441-             print (
442-                 "Docker build context detected, copying precompiled wheel to " 
443-                 f"{ final_wheel_path }  )
444-             shutil .copy2 (wheel_path , final_wheel_path )
445- 
446396
447397def  _no_device () ->  bool :
448398    return  VLLM_TARGET_DEVICE  ==  "empty" 
@@ -676,16 +626,37 @@ def _read_requirements(filename: str) -> list[str]:
676626    ]
677627}
678628
629+ # If using precompiled, extract and patch package_data (in advance of setup) 
630+ if  envs .VLLM_USE_PRECOMPILED :
631+     assert  _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
632+     wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
633+     if  wheel_location  is  not None :
634+         wheel_url  =  wheel_location 
635+     else :
636+         base_commit  =  precompiled_wheel_utils .get_base_commit_in_main_branch ()
637+         wheel_url  =  f"https://wheels.vllm.ai/{ base_commit }  
638+         from  urllib .request  import  urlopen 
639+         try :
640+             with  urlopen (wheel_url ) as  resp :
641+                 if  resp .status  !=  200 :
642+                     wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
643+         except  Exception  as  e :
644+             print (f"[warn] Falling back to nightly wheel: { e }  )
645+             wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
646+ 
647+     patch  =  precompiled_wheel_utils .extract_precompiled_and_patch_package (
648+         wheel_url )
649+     for  pkg , files  in  patch .items ():
650+         package_data .setdefault (pkg , []).extend (files )
651+ 
679652if  _no_device ():
680653    ext_modules  =  []
681654
682- if  not  ext_modules :
655+ if  not  ext_modules  or  envs .VLLM_USE_PRECOMPILED :
656+     # Disable build_ext when using precompiled wheel 
683657    cmdclass  =  {}
684658else :
685-     cmdclass  =  {
686-         "build_ext" :
687-         repackage_wheel  if  envs .VLLM_USE_PRECOMPILED  else  cmake_build_ext 
688-     }
659+     cmdclass  =  {"build_ext" : cmake_build_ext }
689660
690661setup (
691662    # static metadata should rather go in pyproject.toml 
0 commit comments