77import  logging 
88import  os 
99import  re 
10+ import  shutil 
1011import  subprocess 
1112import  sys 
1213from  pathlib  import  Path 
@@ -281,10 +282,81 @@ def run(self):
281282            self .copy_file (file , dst_file )
282283
283284
284- class  repackage_wheel (build_ext ):
285+ class  precompiled_build_ext (build_ext ):
286+     """Disables extension building when using precompiled binaries.""" 
287+ 
288+     def  run (self ) ->  None :
289+         assert  _is_cuda (
290+         ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
291+ 
292+     def  build_extensions (self ) ->  None :
293+         print ("Skipping build_ext: using precompiled extensions." )
294+         return 
295+ 
296+ 
297+ class  precompiled_wheel_utils :
285298    """Extracts libraries and other files from an existing wheel.""" 
286299
287-     def  get_base_commit_in_main_branch (self ) ->  str :
300+     @staticmethod  
301+     def  extract_precompiled_and_patch_package (wheel_url_or_path : str ) ->  dict :
302+         import  tempfile 
303+         import  zipfile 
304+ 
305+         temp_dir  =  None 
306+         try :
307+             if  not  os .path .isfile (wheel_url_or_path ):
308+                 wheel_filename  =  wheel_url_or_path .split ("/" )[- 1 ]
309+                 temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
310+                 wheel_path  =  os .path .join (temp_dir , wheel_filename )
311+                 print (f"Downloading wheel from { wheel_url_or_path }  
312+                       f"to { wheel_path }  )
313+                 from  urllib .request  import  urlretrieve 
314+                 urlretrieve (wheel_url_or_path , filename = wheel_path )
315+             else :
316+                 wheel_path  =  wheel_url_or_path 
317+                 print (f"Using existing wheel at { wheel_path }  )
318+ 
319+             package_data_patch  =  {}
320+ 
321+             with  zipfile .ZipFile (wheel_path ) as  wheel :
322+                 files_to_copy  =  [
323+                     "vllm/_C.abi3.so" ,
324+                     "vllm/_moe_C.abi3.so" ,
325+                     "vllm/_flashmla_C.abi3.so" ,
326+                     "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
327+                     "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
328+                     "vllm/cumem_allocator.abi3.so" ,
329+                 ]
330+ 
331+                 compiled_regex  =  re .compile (
332+                     r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
333+                 file_members  =  list (
334+                     filter (lambda  x : x .filename  in  files_to_copy ,
335+                            wheel .filelist ))
336+                 file_members  +=  list (
337+                     filter (lambda  x : compiled_regex .match (x .filename ),
338+                            wheel .filelist ))
339+ 
340+                 for  file  in  file_members :
341+                     print (f"[extract] { file .filename }  )
342+                     target_path  =  os .path .join ("." , file .filename )
343+                     os .makedirs (os .path .dirname (target_path ), exist_ok = True )
344+                     with  wheel .open (file .filename ) as  src , open (
345+                             target_path , "wb" ) as  dst :
346+                         shutil .copyfileobj (src , dst )
347+ 
348+                     pkg  =  os .path .dirname (file .filename ).replace ("/" , "." )
349+                     package_data_patch .setdefault (pkg , []).append (
350+                         os .path .basename (file .filename ))
351+ 
352+             return  package_data_patch 
353+         finally :
354+             if  temp_dir  is  not None :
355+                 print (f"Removing temporary directory { temp_dir }  )
356+                 shutil .rmtree (temp_dir )
357+ 
358+     @staticmethod  
359+     def  get_base_commit_in_main_branch () ->  str :
288360        # Force to use the nightly wheel. This is mainly used for CI testing. 
289361        if  envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
290362            return  "nightly" 
@@ -297,6 +369,10 @@ def get_base_commit_in_main_branch(self) -> str:
297369            ]).decode ("utf-8" )
298370            upstream_main_commit  =  json .loads (resp_json )["sha" ]
299371
372+             # In Docker build context, .git may be immutable or missing. 
373+             if  envs .VLLM_DOCKER_BUILD_CONTEXT :
374+                 return  upstream_main_commit 
375+ 
300376            # Check if the upstream_main_commit exists in the local repo 
301377            try :
302378                subprocess .check_output (
@@ -329,86 +405,6 @@ def get_base_commit_in_main_branch(self) -> str:
329405                "wheel may not be compatible with your dev branch: %s" , err )
330406            return  "nightly" 
331407
332-     def  run (self ) ->  None :
333-         assert  _is_cuda (
334-         ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
335- 
336-         wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
337-         if  wheel_location  is  None :
338-             base_commit  =  self .get_base_commit_in_main_branch ()
339-             wheel_location  =  f"https://wheels.vllm.ai/{ base_commit }  
340-             # Fallback to nightly wheel if latest commit wheel is unavailable, 
341-             # in this rare case, the nightly release CI hasn't finished on main. 
342-             if  not  is_url_available (wheel_location ):
343-                 wheel_location  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
344- 
345-         import  zipfile 
346- 
347-         if  os .path .isfile (wheel_location ):
348-             wheel_path  =  wheel_location 
349-             print (f"Using existing wheel={ wheel_path }  )
350-         else :
351-             # Download the wheel from a given URL, assume 
352-             # the filename is the last part of the URL 
353-             wheel_filename  =  wheel_location .split ("/" )[- 1 ]
354- 
355-             import  tempfile 
356- 
357-             # create a temporary directory to store the wheel 
358-             temp_dir  =  tempfile .mkdtemp (prefix = "vllm-wheels" )
359-             wheel_path  =  os .path .join (temp_dir , wheel_filename )
360- 
361-             print (f"Downloading wheel from { wheel_location } { wheel_path }  )
362- 
363-             from  urllib .request  import  urlretrieve 
364- 
365-             try :
366-                 urlretrieve (wheel_location , filename = wheel_path )
367-             except  Exception  as  e :
368-                 from  setuptools .errors  import  SetupError 
369- 
370-                 raise  SetupError (
371-                     f"Failed to get vLLM wheel from { wheel_location }  ) from  e 
372- 
373-         with  zipfile .ZipFile (wheel_path ) as  wheel :
374-             files_to_copy  =  [
375-                 "vllm/_C.abi3.so" ,
376-                 "vllm/_moe_C.abi3.so" ,
377-                 "vllm/_flashmla_C.abi3.so" ,
378-                 "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
379-                 "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
380-                 "vllm/cumem_allocator.abi3.so" ,
381-                 # "vllm/_version.py", # not available in nightly wheels yet 
382-             ]
383- 
384-             file_members  =  list (
385-                 filter (lambda  x : x .filename  in  files_to_copy , wheel .filelist ))
386- 
387-             # vllm_flash_attn python code: 
388-             # Regex from 
389-             #  `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)` 
390-             compiled_regex  =  re .compile (
391-                 r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
392-             file_members  +=  list (
393-                 filter (lambda  x : compiled_regex .match (x .filename ),
394-                        wheel .filelist ))
395- 
396-             for  file  in  file_members :
397-                 print (f"Extracting and including { file .filename }  
398-                       "from existing wheel" )
399-                 package_name  =  os .path .dirname (file .filename ).replace ("/" , "." )
400-                 file_name  =  os .path .basename (file .filename )
401- 
402-                 if  package_name  not  in package_data :
403-                     package_data [package_name ] =  []
404- 
405-                 wheel .extract (file )
406-                 if  file_name .endswith (".py" ):
407-                     # python files shouldn't be added to package_data 
408-                     continue 
409- 
410-                 package_data [package_name ].append (file_name )
411- 
412408
413409def  _no_device () ->  bool :
414410    return  VLLM_TARGET_DEVICE  ==  "empty" 
@@ -639,6 +635,29 @@ def _read_requirements(filename: str) -> list[str]:
639635    ]
640636}
641637
638+ # If using precompiled, extract and patch package_data (in advance of setup) 
639+ if  envs .VLLM_USE_PRECOMPILED :
640+     assert  _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds" 
641+     wheel_location  =  os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
642+     if  wheel_location  is  not None :
643+         wheel_url  =  wheel_location 
644+     else :
645+         base_commit  =  precompiled_wheel_utils .get_base_commit_in_main_branch ()
646+         wheel_url  =  f"https://wheels.vllm.ai/{ base_commit }  
647+         from  urllib .request  import  urlopen 
648+         try :
649+             with  urlopen (wheel_url ) as  resp :
650+                 if  resp .status  !=  200 :
651+                     wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
652+         except  Exception  as  e :
653+             print (f"[warn] Falling back to nightly wheel: { e }  )
654+             wheel_url  =  "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl" 
655+ 
656+     patch  =  precompiled_wheel_utils .extract_precompiled_and_patch_package (
657+         wheel_url )
658+     for  pkg , files  in  patch .items ():
659+         package_data .setdefault (pkg , []).extend (files )
660+ 
642661if  _no_device ():
643662    ext_modules  =  []
644663
@@ -647,7 +666,7 @@ def _read_requirements(filename: str) -> list[str]:
647666else :
648667    cmdclass  =  {
649668        "build_ext" :
650-         repackage_wheel  if  envs .VLLM_USE_PRECOMPILED  else  cmake_build_ext 
669+         precompiled_build_ext  if  envs .VLLM_USE_PRECOMPILED  else  cmake_build_ext 
651670    }
652671
653672setup (
0 commit comments