vllm-project · tlrmchlsmth · Mar 14, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 4, 2025
@@ -46,8 +46,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
 # requirements.txt files and should be kept consistent.  The ROCm torch
 # versions are derived from Dockerfile.rocm
 #
-set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1")
-set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1")
+set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
+set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")
 
 #
 # Try to find python package with an executable that exactly matches

diff --git a/Dockerfile b/Dockerfile
@@ -222,7 +222,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 
 RUN --mount=type=cache,target=/root/.cache/uv \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
-    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \
+    uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
 fi
 COPY examples examples
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ requires = [
     "packaging",
     "setuptools>=61",
     "setuptools-scm>=8.0",
-    "torch == 2.5.1",
+    "torch == 2.6.0",
     "wheel",
     "jinja2",
 ]

diff --git a/requirements/build.txt b/requirements/build.txt
@@ -4,6 +4,6 @@ ninja
 packaging
 setuptools>=61
 setuptools-scm>=8
-torch==2.5.1
+torch==2.6.0
 wheel
 jinja2
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -4,9 +4,9 @@
 numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
 
 # Dependencies for NVIDIA GPUs
-ray[cgraph] >= 2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch == 2.5.1
-torchaudio==2.5.1
+ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+torch==2.6.0
+torchaudio==2.6.0
 # These must be updated alongside torch
-torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
-xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.5.1
+torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch 2.6.0
diff --git a/requirements/test.in b/requirements/test.in
@@ -21,16 +21,17 @@ sentence-transformers # required for embedding tests
 soundfile # required for audio tests
 jiwer # required for audio tests
 timm # required for internvl test
-torch==2.5.1
-torchaudio==2.5.1
+torch==2.6.0
+torchaudio==2.6.0
+torchvision==0.21.0
 transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[opencv] >= 1.5.0 # required for pixtral test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]==0.4.4 # required for model evaluation test
 transformers==4.48.2 
 # quantization
-bitsandbytes>=0.45.0
+bitsandbytes>=0.45.3
 buildkite-test-collector==0.1.9
 
 genai_perf==0.0.8

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -33,7 +33,7 @@ audioread==3.0.1
     # via librosa
 awscli==1.35.23
     # via -r requirements/test.in
-bitsandbytes==0.45.0
+bitsandbytes==0.45.3
     # via -r requirements/test.in
 black==24.10.0
     # via datamodel-code-generator
@@ -127,7 +127,6 @@ filelock==3.16.1
     #   ray
     #   torch
     #   transformers
-    #   triton
 fonttools==4.54.1
     # via matplotlib
 frozendict==2.4.6
@@ -320,6 +319,8 @@ nvidia-cusparse-cu12==12.3.1.170
     # via
     #   nvidia-cusolver-cu12
     #   torch
+nvidia-cusparselt-cu12==0.6.2
+    # via torch
 nvidia-nccl-cu12==2.21.5
     # via torch
 nvidia-nvjitlink-cu12==12.4.127
@@ -591,7 +592,7 @@ timm==1.0.11
     # via -r requirements/test.in
 tokenizers==0.21.0
     # via transformers
-torch==2.5.1
+torch==2.6.0
     # via
     #   -r requirements/test.in
     #   accelerate
@@ -607,13 +608,15 @@ torch==2.5.1
     #   torchvision
     #   vector-quantize-pytorch
     #   vocos
-torchaudio==2.5.1
+torchaudio==2.6.0
     # via
     #   -r requirements/test.in
     #   encodec
     #   vocos
-torchvision==0.20.1
-    # via timm
+torchvision==0.21.0
+    # via
+    #   -r requirements/test.in
+    #   timm
 tqdm==4.66.6
     # via
     #   datasets
@@ -638,7 +641,7 @@ transformers==4.48.2
     #   transformers-stream-generator
 transformers-stream-generator==0.0.5
     # via -r requirements/test.in
-triton==3.1.0
+triton==3.2.0
     # via torch
 tritonclient==2.51.0
     # via
@@ -651,7 +654,6 @@ typepy==1.3.2
     #   tabledata
 typing-extensions==4.12.2
     # via
-    #   bitsandbytes
     #   huggingface-hub
     #   librosa
     #   mistral-common

diff --git a/tests/compile/backend.py b/tests/compile/backend.py
@@ -6,6 +6,7 @@
 from torch import fx
 
 from vllm.compilation.inductor_pass import InductorPass
+from vllm.config import get_current_vllm_config
 
 
 class TestBackend:
@@ -17,13 +18,14 @@ class TestBackend:
     Inductor config can be modified directly by editing the inductor_config
     property. This can be helpful for adding passes like the
     'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
+    Inductor config is default-initialized from VllmConfig.CompilationConfig.
     """
 
     def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
                                                              None]]):
         self.custom_passes = list(passes)
-        from torch._inductor import config
-        self.inductor_config = config.shallow_copy_dict()
+        compile_config = get_current_vllm_config().compilation_config
+        self.inductor_config = compile_config.inductor_compile_config
         self.inductor_config['force_disable_caches'] = True
         self.inductor_config['post_grad_custom_post_pass'] = self.post_pass
 

@@ -52,6 +52,8 @@
 else:
     QuantizationConfig = None
 
+from packaging.version import Version
+
 logger = init_logger(__name__)
 
 # This value is chosen to have a balance between ITL and TTFT. Note it is
@@ -3126,6 +3128,19 @@ def model_post_init(self, __context: Any) -> None:
         count_all = self.custom_ops.count("all")
         assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
 
+        # TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
+        # 1. A bug in PyTorch, fixed in 2.7:
+        #    https://github.com/pytorch/pytorch/issues/147924
+        # 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
+        #    work with V2. Addressing this will take extra engineering effort
+        #    and it is not yet a priority. RFC here:
+        #    https://github.com/vllm-project/vllm/issues/14703
+
+        if Version(torch.__version__) >= Version("2.6"):
+            KEY = 'enable_auto_functionalized_v2'
+            if KEY not in self.inductor_compile_config:
+                self.inductor_compile_config[KEY] = False
+
         if self.splitting_ops is None:
             if envs.VLLM_USE_V1:
                 # v1 must split the graph on attention ops