Skip to content

Update to torch==2.6.0 #12721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101")
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.1")
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.1")
set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.6.0")

#
# Try to find python package with an executable that exactly matches
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist

RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \
uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
fi
COPY examples examples

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires = [
"packaging",
"setuptools>=61",
"setuptools-scm>=8.0",
"torch == 2.5.1",
"torch == 2.6.0",
"wheel",
"jinja2",
]
Expand Down
2 changes: 1 addition & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ ninja
packaging
setuptools>=61
setuptools-scm>=8
torch==2.5.1
torch==2.6.0
wheel
jinja2
10 changes: 5 additions & 5 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding

# Dependencies for NVIDIA GPUs
ray[cgraph] >= 2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch == 2.5.1
torchaudio==2.5.1
ray[cgraph]>=2.43.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.6.0
torchaudio==2.6.0
# These must be updated alongside torch
torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1
torchvision==0.21.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers==0.0.29.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.6.0
7 changes: 4 additions & 3 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
timm # required for internvl test
torch==2.5.1
torchaudio==2.5.1
torch==2.6.0
torchaudio==2.6.0
torchvision==0.21.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[opencv] >= 1.5.0 # required for pixtral test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]==0.4.4 # required for model evaluation test
transformers==4.48.2
# quantization
bitsandbytes>=0.45.0
bitsandbytes>=0.45.3
buildkite-test-collector==0.1.9

genai_perf==0.0.8
Expand Down
18 changes: 10 additions & 8 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ audioread==3.0.1
# via librosa
awscli==1.35.23
# via -r requirements/test.in
bitsandbytes==0.45.0
bitsandbytes==0.45.3
# via -r requirements/test.in
black==24.10.0
# via datamodel-code-generator
Expand Down Expand Up @@ -127,7 +127,6 @@ filelock==3.16.1
# ray
# torch
# transformers
# triton
fonttools==4.54.1
# via matplotlib
frozendict==2.4.6
Expand Down Expand Up @@ -320,6 +319,8 @@ nvidia-cusparse-cu12==12.3.1.170
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.6.2
# via torch
nvidia-nccl-cu12==2.21.5
# via torch
nvidia-nvjitlink-cu12==12.4.127
Expand Down Expand Up @@ -591,7 +592,7 @@ timm==1.0.11
# via -r requirements/test.in
tokenizers==0.21.0
# via transformers
torch==2.5.1
torch==2.6.0
# via
# -r requirements/test.in
# accelerate
Expand All @@ -607,13 +608,15 @@ torch==2.5.1
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.5.1
torchaudio==2.6.0
# via
# -r requirements/test.in
# encodec
# vocos
torchvision==0.20.1
# via timm
torchvision==0.21.0
# via
# -r requirements/test.in
# timm
tqdm==4.66.6
# via
# datasets
Expand All @@ -638,7 +641,7 @@ transformers==4.48.2
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.1.0
triton==3.2.0
# via torch
tritonclient==2.51.0
# via
Expand All @@ -651,7 +654,6 @@ typepy==1.3.2
# tabledata
typing-extensions==4.12.2
# via
# bitsandbytes
# huggingface-hub
# librosa
# mistral-common
Expand Down
6 changes: 4 additions & 2 deletions tests/compile/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from torch import fx

from vllm.compilation.inductor_pass import InductorPass
from vllm.config import get_current_vllm_config


class TestBackend:
Expand All @@ -17,13 +18,14 @@ class TestBackend:
Inductor config can be modified directly by editing the inductor_config
property. This can be helpful for adding passes like the
'pre_grad_custom_pass' and the 'post_grad_custom_pre_pass'.
Inductor config is default-initialized from VllmConfig.CompilationConfig.
"""

def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph],
None]]):
self.custom_passes = list(passes)
from torch._inductor import config
self.inductor_config = config.shallow_copy_dict()
compile_config = get_current_vllm_config().compilation_config
self.inductor_config = compile_config.inductor_compile_config
self.inductor_config['force_disable_caches'] = True
self.inductor_config['post_grad_custom_post_pass'] = self.post_pass

Expand Down
15 changes: 15 additions & 0 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
else:
QuantizationConfig = None

from packaging.version import Version

logger = init_logger(__name__)

# This value is chosen to have a balance between ITL and TTFT. Note it is
Expand Down Expand Up @@ -3126,6 +3128,19 @@ def model_post_init(self, __context: Any) -> None:
count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"

# TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
# 1. A bug in PyTorch, fixed in 2.7:
# https://github.com/pytorch/pytorch/issues/147924
# 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
# work with V2. Addressing this will take extra engineering effort
# and it is not yet a priority. RFC here:
# https://github.com/vllm-project/vllm/issues/14703

if Version(torch.__version__) >= Version("2.6"):
KEY = 'enable_auto_functionalized_v2'
if KEY not in self.inductor_compile_config:
self.inductor_compile_config[KEY] = False

if self.splitting_ops is None:
if envs.VLLM_USE_V1:
# v1 must split the graph on attention ops
Expand Down