Skip to content

Commit c6704bb

Browse files
bnellnmyoukaichao
authored andcommitted
[Misc] Upgrade to pytorch 2.5 (vllm-project#9588)
Signed-off-by: Bill Nell <bill@neuralmagic.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
1 parent fefbd73 commit c6704bb

File tree

8 files changed

+48
-25
lines changed

8 files changed

+48
-25
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
4949
# requirements.txt files and should be kept consistent. The ROCm torch
5050
# versions are derived from Dockerfile.rocm
5151
#
52-
set(TORCH_SUPPORTED_VERSION_CUDA "2.4.0")
52+
set(TORCH_SUPPORTED_VERSION_CUDA "2.5.0")
5353
set(TORCH_SUPPORTED_VERSION_ROCM "2.5.0")
5454

5555
#
@@ -507,7 +507,7 @@ else()
507507
FetchContent_Declare(
508508
vllm-flash-attn
509509
GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git
510-
GIT_TAG 013f0c4fc47e6574060879d9734c1df8c5c273bd
510+
GIT_TAG 5259c586c403a4e4d8bf69973c159b40cc346fb9
511511
GIT_PROGRESS TRUE
512512
# Don't share the vllm-flash-attn build between build types
513513
BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn

cmake/utils.cmake

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -424,11 +424,7 @@ function (define_gpu_extension_target GPU_MOD_NAME)
424424
# Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of
425425
# dependencies that are not necessary and may not be installed.
426426
if (GPU_LANGUAGE STREQUAL "CUDA")
427-
if ("${CUDA_CUDA_LIB}" STREQUAL "")
428-
set(CUDA_CUDA_LIB "${CUDA_CUDA_LIBRARY}")
429-
endif()
430-
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${CUDA_CUDA_LIB}
431-
${CUDA_LIBRARIES})
427+
target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart CUDA::cuda_driver)
432428
else()
433429
target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES})
434430
endif()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ requires = [
66
"packaging",
77
"setuptools>=61",
88
"setuptools-scm>=8.0",
9-
"torch == 2.4.0",
9+
"torch == 2.5.0",
1010
"wheel",
1111
"jinja2",
1212
]

requirements-build.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ ninja
44
packaging
55
setuptools>=61
66
setuptools-scm>=8
7-
torch==2.4.0
7+
torch==2.5.0
88
wheel
99
jinja2

requirements-cuda.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Dependencies for NVIDIA GPUs
55
ray >= 2.9
66
nvidia-ml-py # for pynvml package
7-
torch == 2.4.0
7+
torch == 2.5.0
88
# These must be updated alongside torch
9-
torchvision == 0.19 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
10-
xformers == 0.0.27.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.4.0
9+
torchvision == 0.20 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
10+
xformers == 0.0.28.post2; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.0

requirements-openvino.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Common dependencies
22
-r requirements-common.txt
33

4-
torch == 2.4.0 # should be aligned with "common" vLLM torch version
4+
torch == 2.5.0 # should be aligned with "common" vLLM torch version
55
openvino >= 2024.4.0 # since 2024.4.0 both CPU and GPU support Paged Attention
66

77
optimum @ git+https://github.com/huggingface/optimum.git@main # latest optimum is used to support latest transformers version

tests/models/decoder_only/language/test_big_models.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from vllm.platforms import current_platform
1010

11-
from ...utils import check_outputs_equal
11+
from ...utils import check_logprobs_close, check_outputs_equal
1212

1313
MODELS = [
1414
"meta-llama/Llama-2-7b-hf",
@@ -43,18 +43,40 @@ def test_models(
4343
dtype: str,
4444
max_tokens: int,
4545
) -> None:
46-
with hf_runner(model, dtype=dtype) as hf_model:
47-
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
4846

49-
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
50-
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
51-
52-
check_outputs_equal(
53-
outputs_0_lst=hf_outputs,
54-
outputs_1_lst=vllm_outputs,
55-
name_0="hf",
56-
name_1="vllm",
57-
)
47+
if model == "openbmb/MiniCPM3-4B":
48+
# the output becomes slightly different when upgrading to
49+
# pytorch 2.5 . Changing to logprobs checks instead of exact
50+
# output checks.
51+
NUM_LOG_PROBS = 8
52+
with hf_runner(model, dtype=dtype) as hf_model:
53+
hf_outputs = hf_model.generate_greedy_logprobs_limit(
54+
example_prompts, max_tokens, NUM_LOG_PROBS)
55+
56+
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
57+
vllm_outputs = vllm_model.generate_greedy_logprobs(
58+
example_prompts, max_tokens, NUM_LOG_PROBS)
59+
60+
check_logprobs_close(
61+
outputs_0_lst=hf_outputs,
62+
outputs_1_lst=vllm_outputs,
63+
name_0="hf",
64+
name_1="vllm",
65+
)
66+
else:
67+
with hf_runner(model, dtype=dtype) as hf_model:
68+
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
69+
70+
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
71+
vllm_outputs = vllm_model.generate_greedy(example_prompts,
72+
max_tokens)
73+
74+
check_outputs_equal(
75+
outputs_0_lst=hf_outputs,
76+
outputs_1_lst=vllm_outputs,
77+
name_0="hf",
78+
name_1="vllm",
79+
)
5880

5981

6082
@pytest.mark.parametrize("model", MODELS)

vllm/platforms/cuda.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Callable, List, Tuple, TypeVar
88

99
import pynvml
10+
import torch
1011
from typing_extensions import ParamSpec
1112

1213
from vllm.logger import init_logger
@@ -26,6 +27,10 @@
2627
" and cause errors. See https://pypi.org/project/pynvml "
2728
"for more information.")
2829

30+
# pytorch 2.5 uses cudnn sdpa by default, which will cause crash on some models
31+
# see https://github.com/huggingface/diffusers/issues/9704 for details
32+
torch.backends.cuda.enable_cudnn_sdp(False)
33+
2934
# NVML utils
3035
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
3136
# all the related functions work on real physical device ids.

0 commit comments

Comments
 (0)