From 24f60a54f42076e0bfa49fde113756bf4e95f9ef Mon Sep 17 00:00:00 2001 From: AguirreNicolas <37890346+AguirreNicolas@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:00:32 -0300 Subject: [PATCH] [Docker] Adding number of nvcc_threads during build as envar (#1893) --- Dockerfile | 3 +++ docs/source/serving/deploying_with_docker.rst | 2 +- setup.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7fd8933957334..b1be5fb9e3b83 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,6 +32,9 @@ COPY vllm/__init__.py vllm/__init__.py # max jobs used by Ninja to build extensions ENV MAX_JOBS=$max_jobs +# number of threads used by nvcc +ARG nvcc_threads=8 +ENV NVCC_THREADS=$nvcc_threads RUN python3 setup.py build_ext --inplace # image to run unit testing suite diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index e1daecc5cdc2c..3afefecc0c0e5 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -29,7 +29,7 @@ You can build and run vLLM from source via the provided dockerfile. To build vLL .. code-block:: console - $ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --build-arg max_jobs=8 + $ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2 To run vLLM: diff --git a/setup.py b/setup.py index 2b040e88f0aa4..95a7d7060fefc 100644 --- a/setup.py +++ b/setup.py @@ -138,7 +138,8 @@ def get_torch_arch_list() -> Set[str]: # Use NVCC threads to parallelize the build. if nvcc_cuda_version >= Version("11.2"): - num_threads = min(os.cpu_count(), 8) + nvcc_threads = int(os.getenv("NVCC_THREADS"), 8) + num_threads = min(os.cpu_count(), nvcc_threads) NVCC_FLAGS += ["--threads", str(num_threads)] ext_modules = []