Skip to content

Commit 4df1cf4

Browse files
maktukmakbigPYJ1151
authored andcommitted
CPU only build (vllm-project#9)
1 parent 8fb0698 commit 4df1cf4

17 files changed

+1143
-55
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ sanitizer:
2828
py_install:
2929
VLLM_BUILD_CPU_OPS=1 MAX_JOBS=JOBS pip install --no-build-isolation -v -e .
3030

31+
py_install_cpu:
32+
VLLM_BUILD_CPU_ONLY=1 MAX_JOBS=JOBS pip install --no-build-isolation -v -e .
33+
34+
install_vllm:
35+
MAX_JOBS=JOBS pip install -v git+https://github.com/intel-sandbox/vllm-xpu.git@dev -f https://download.pytorch.org/whl/torch_stable.html
36+
3137
package:
3238
VLLM_BUILD_CPU_OPS=1 MAX_JOBS=JOBS python setup.py bdist_wheel
3339
echo "Wheel package is saved in ./dist/"

cpu.Dockerfile

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
FROM python:3.10 AS dev
2+
3+
RUN apt-get update -y \
4+
&& apt-get install -y python3-pip
5+
6+
WORKDIR /workspace
7+
8+
# install build and runtime dependencies
9+
COPY requirements-cpu.txt requirements-cpu.txt
10+
RUN --mount=type=cache,target=/root/.cache/pip \
11+
pip install -r requirements-cpu.txt
12+
13+
# install development dependencies
14+
COPY requirements-dev.txt requirements-dev.txt
15+
RUN --mount=type=cache,target=/root/.cache/pip \
16+
pip install -r requirements-dev.txt
17+
18+
# image to build pytorch extensions
19+
FROM dev AS build
20+
21+
# install build dependencies
22+
COPY requirements-build-cpu.txt requirements-build-cpu.txt
23+
RUN --mount=type=cache,target=/root/.cache/pip \
24+
pip install -r requirements-build-cpu.txt
25+
26+
# copy input files
27+
COPY csrc csrc
28+
COPY setup.py setup.py
29+
COPY requirements-cpu.txt requirements-cpu.txt
30+
COPY pyproject.toml pyproject.toml
31+
COPY vllm/__init__.py vllm/__init__.py
32+
33+
# max jobs used by Ninja to build extensions
34+
ENV MAX_JOBS=$max_jobs
35+
RUN python3 setup.py build_ext --inplace
36+
37+
# image to run unit testing suite
38+
FROM dev AS test
39+
40+
# copy pytorch extensions separately to avoid having to rebuild
41+
# when python code changes
42+
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
43+
COPY tests tests
44+
COPY vllm vllm
45+
46+
ENTRYPOINT ["python3", "-m", "pytest", "tests"]
47+
48+
# use CUDA base as CUDA runtime dependencies are already installed via pip
49+
FROM python:3.10 AS dev
50+
51+
# libnccl required for ray
52+
RUN apt-get update -y \
53+
&& apt-get install -y python3-pip
54+
55+
WORKDIR /workspace
56+
COPY requirements-cpu.txt requirements-cpu.txt
57+
RUN --mount=type=cache,target=/root/.cache/pip \
58+
pip install -r requirements-cpu.txt
59+
60+
FROM vllm-base AS vllm
61+
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
62+
COPY vllm vllm
63+
64+
EXPOSE 8000
65+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
66+
67+
# openai api server alternative
68+
FROM vllm-base AS vllm-openai
69+
# install additional dependencies for openai api server
70+
RUN --mount=type=cache,target=/root/.cache/pip \
71+
pip install accelerate fschat
72+
73+
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
74+
COPY vllm vllm
75+
76+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
77+

csrc/dispatch_utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,14 @@
1414
#define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \
1515
AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
1616

17+
#ifdef VLLM_BUILD_CPU_ONLY
18+
#define VLLM_DISPATCH_TO_CUDA_CASE(BASENAME, ...)
19+
#else
1720
#define VLLM_DISPATCH_TO_CUDA_CASE(BASENAME, ...) \
1821
case c10::DeviceType::CUDA: { \
1922
return BASENAME(__VA_ARGS__); \
2023
}
24+
#endif
2125

2226
#ifdef VLLM_BUILD_CPU_OPS
2327
#define VLLM_DISPATCH_TO_CPU_CASE(BASENAME, ...) \

csrc/pybind.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ void gptq_shuffle_dispatch(
8787
VLLM_DISPATCH_DEVICES(q_weight.device(), gptq_shuffle, q_weight, q_perm);
8888
}
8989

90+
#ifdef VLLM_BUILD_CPU_ONLY
91+
int get_device_attribute(
92+
int attribute,
93+
int device_id) { return 94387; }
94+
#endif
95+
9096
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
9197
// vLLM custom ops
9298
pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");

Dockerfile renamed to gpu.Dockerfile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ RUN apt-get update -y \
1010
WORKDIR /workspace
1111

1212
# install build and runtime dependencies
13-
COPY requirements.txt requirements.txt
13+
COPY requirements-gpu.txt requirements-gpu.txt
1414
RUN --mount=type=cache,target=/root/.cache/pip \
15-
pip install -r requirements.txt
15+
pip install -r requirements-gpu.txt
1616

1717
# install development dependencies
1818
COPY requirements-dev.txt requirements-dev.txt
@@ -25,14 +25,14 @@ RUN --mount=type=cache,target=/root/.cache/pip \
2525
FROM dev AS build
2626

2727
# install build dependencies
28-
COPY requirements-build.txt requirements-build.txt
28+
COPY requirements-build-gpu.txt requirements-build-gpu.txt
2929
RUN --mount=type=cache,target=/root/.cache/pip \
30-
pip install -r requirements-build.txt
30+
pip install -r requirements-build-gpu.txt
3131

3232
# copy input files
3333
COPY csrc csrc
3434
COPY setup.py setup.py
35-
COPY requirements.txt requirements.txt
35+
COPY requirements-gpu.txt requirements-gpu.txt
3636
COPY pyproject.toml pyproject.toml
3737
COPY vllm/__init__.py vllm/__init__.py
3838

@@ -75,9 +75,9 @@ RUN apt-get update -y \
7575
&& apt-get install -y python3-pip
7676

7777
WORKDIR /workspace
78-
COPY requirements.txt requirements.txt
78+
COPY requirements-gpu.txt requirements-gpu.txt
7979
RUN --mount=type=cache,target=/root/.cache/pip \
80-
pip install -r requirements.txt
80+
pip install -r requirements-gpu.txt
8181
#################### RUNTIME BASE IMAGE ####################
8282

8383

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = [
44
"ninja",
55
"packaging",
66
"setuptools >= 49.4.0",
7-
"torch == 2.1.2",
7+
"torch == 2.1.2+cpu",
88
"wheel",
99
]
1010
build-backend = "setuptools.build_meta"

requirements-build-cpu.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Should be mirrored in pyproject.toml
2+
ninja
3+
packaging
4+
setuptools>=49.4.0
5+
torch==2.1.2+cpu
6+
wheel
File renamed without changes.

requirements-cpu.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
ninja # For faster builds.
2+
psutil
3+
ray >= 2.5.1
4+
pandas # Required for Ray data.
5+
pyarrow # Required for Ray data.
6+
pybind11
7+
sentencepiece # Required for LLaMA tokenizer.
8+
numpy
9+
einops # Required for phi-1_5
10+
torch == 2.1.2+cpu
11+
transformers >= 4.34.0 # Required for Mistral.
12+
fastapi
13+
uvicorn[standard]
14+
pydantic == 1.10.13 # Required for OpenAI server.
15+
aioprometheus[starlette]
File renamed without changes.

0 commit comments

Comments
 (0)