From ca81ff5196b2fa82b7a9d553cd6e30eab9e72aca Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 4 Apr 2024 10:26:19 -0700 Subject: [PATCH] [Core] manage nccl via a pypi package & upgrade to pt 2.2.1 (#3805) --- .github/workflows/publish.yml | 2 +- CMakeLists.txt | 2 +- Dockerfile | 10 +++++++--- pyproject.toml | 2 +- requirements-build.txt | 2 +- requirements.txt | 5 +++-- setup.py | 10 ++++++++++ vllm/model_executor/parallel_utils/pynccl.py | 14 ++++++++++++-- 8 files changed, 36 insertions(+), 11 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5211dc180798e..2db687a287ef1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -49,7 +49,7 @@ jobs: matrix: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt. + pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt. cuda-version: ['11.8', '12.1'] steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d0cf730de973..1845151181284 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11 # requirements.txt files and should be kept consistent. The ROCm torch # versions are derived from Dockerfile.rocm # -set(TORCH_SUPPORTED_VERSION_CUDA "2.1.2") +set(TORCH_SUPPORTED_VERSION_CUDA "2.2.1") set(TORCH_SUPPORTED_VERSION_ROCM_5X "2.0.1") set(TORCH_SUPPORTED_VERSION_ROCM_6X "2.1.1") diff --git a/Dockerfile b/Dockerfile index f975530e09407..f2f5e513341f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,13 @@ RUN --mount=type=cache,target=/root/.cache/pip \ COPY requirements-dev.txt requirements-dev.txt RUN --mount=type=cache,target=/root/.cache/pip \ pip install -r requirements-dev.txt + +# cuda arch list used by torch +# can be useful for both `dev` and `test` +# explicitly set the list to avoid issues with torch 2.2 +# see https://github.com/pytorch/pytorch/pull/123243 +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX' +ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} #################### BASE BUILD IMAGE #################### @@ -47,9 +54,6 @@ COPY requirements.txt requirements.txt COPY pyproject.toml pyproject.toml COPY vllm/__init__.py vllm/__init__.py -# cuda arch list used by torch -ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX' -ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} # max jobs used by Ninja to build extensions ARG max_jobs=2 ENV MAX_JOBS=${max_jobs} diff --git a/pyproject.toml b/pyproject.toml index b7ad8b8ca7e46..2a00d6796ee02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "ninja", "packaging", "setuptools >= 49.4.0", - "torch == 2.1.2", + "torch == 2.2.1", "wheel", ] build-backend = "setuptools.build_meta" diff --git a/requirements-build.txt b/requirements-build.txt index a8efcde590bbf..2bc07fb152aac 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -3,5 +3,5 @@ cmake>=3.21 ninja packaging setuptools>=49.4.0 -torch==2.1.2 +torch==2.2.1 wheel diff --git a/requirements.txt b/requirements.txt index df0f6dd1ee3ca..4faf0250ff0af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,11 +4,11 @@ psutil ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. numpy -torch == 2.1.2 +torch == 2.2.1 requests py-cpuinfo transformers >= 4.39.1 # Required for StarCoder2 & Llava. -xformers == 0.0.23.post1 # Required for CUDA 12.1. +xformers == 0.0.25 # Requires PyTorch 2.2.1. fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. @@ -17,3 +17,4 @@ pynvml == 11.5.0 triton >= 2.1.0 outlines == 0.0.34 tiktoken == 0.6.0 # Required for DBRX tokenizer +vllm-nccl-cu12>=2.18<2.19 # for downloading nccl library diff --git a/setup.py b/setup.py index e80226faa4807..d64af4283863c 100644 --- a/setup.py +++ b/setup.py @@ -328,6 +328,16 @@ def get_requirements() -> List[str]: if _is_cuda(): with open(get_path("requirements.txt")) as f: requirements = f.read().strip().split("\n") + cuda_major = torch.version.cuda.split(".")[0] + modified_requirements = [] + for req in requirements: + if "vllm-nccl-cu12" in req: + modified_requirements.append( + req.replace("vllm-nccl-cu12", + f"vllm-nccl-cu{cuda_major}")) + else: + modified_requirements.append(req) + requirements = modified_requirements elif _is_hip(): with open(get_path("requirements-rocm.txt")) as f: requirements = f.read().strip().split("\n") diff --git a/vllm/model_executor/parallel_utils/pynccl.py b/vllm/model_executor/parallel_utils/pynccl.py index 2aed70f05e067..f7f83528cd06c 100644 --- a/vllm/model_executor/parallel_utils/pynccl.py +++ b/vllm/model_executor/parallel_utils/pynccl.py @@ -21,6 +21,7 @@ import ctypes import datetime +import glob import os # ===================== import region ===================== @@ -34,18 +35,27 @@ so_file = os.environ.get("VLLM_NCCL_SO_PATH", "") +# check if we have vllm-managed nccl +vllm_nccl_path = None +if torch.version.cuda is not None: + cuda_major = torch.version.cuda.split(".")[0] + path = os.path.expanduser( + f"~/.config/vllm/nccl/cu{cuda_major}/libnccl.so.*") + files = glob.glob(path) + vllm_nccl_path = files[0] if files else None + # manually load the nccl library if so_file: logger.info( f"Loading nccl from environment variable VLLM_NCCL_SO_PATH={so_file}") else: if torch.version.cuda is not None: - so_file = "libnccl.so.2" + so_file = vllm_nccl_path or "libnccl.so.2" elif torch.version.hip is not None: so_file = "librccl.so.1" else: raise ValueError("NCCL only supports CUDA and ROCm backends.") - logger.debug(f"Loading nccl from library {so_file}") + logger.info(f"Loading nccl from library {so_file}") try: nccl = ctypes.CDLL(so_file)