bigPYJ1151
diff --git a/‎Makefile
Lines changed: 6 additions & 0 deletions b/‎Makefile
Lines changed: 6 additions & 0 deletions
diff --git a/‎cpu.Dockerfile
Lines changed: 77 additions & 0 deletions b/‎cpu.Dockerfile
Lines changed: 77 additions & 0 deletions
diff --git a/‎csrc/dispatch_utils.h
Lines changed: 4 additions & 0 deletions b/‎csrc/dispatch_utils.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎csrc/pybind.cpp
Lines changed: 6 additions & 0 deletions b/‎csrc/pybind.cpp
Lines changed: 6 additions & 0 deletions
diff --git a/‎Dockerfile renamed to ‎gpu.Dockerfile
Lines changed: 7 additions & 7 deletions b/‎Dockerfile renamed to ‎gpu.Dockerfile
Lines changed: 7 additions & 7 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements-build-cpu.txt
Lines changed: 6 additions & 0 deletions b/‎requirements-build-cpu.txt
Lines changed: 6 additions & 0 deletions
diff --git a/‎requirements-build.txt renamed to ‎requirements-build-gpu.txt b/‎requirements-build.txt renamed to ‎requirements-build-gpu.txt
diff --git a/‎requirements-cpu.txt
Lines changed: 15 additions & 0 deletions b/‎requirements-cpu.txt
Lines changed: 15 additions & 0 deletions
diff --git a/‎requirements.txt renamed to ‎requirements-gpu.txt b/‎requirements.txt renamed to ‎requirements-gpu.txt
@@ -28,6 +28,12 @@ sanitizer:
 py_install:
 	VLLM_BUILD_CPU_OPS=1 MAX_JOBS=JOBS pip install --no-build-isolation  -v -e .
 
+py_install_cpu:
+	VLLM_BUILD_CPU_ONLY=1 MAX_JOBS=JOBS pip install --no-build-isolation  -v -e .
+
+install_vllm:
+	MAX_JOBS=JOBS pip install -v git+https://github.com/intel-sandbox/vllm-xpu.git@dev -f https://download.pytorch.org/whl/torch_stable.html
+
 package:
 	VLLM_BUILD_CPU_OPS=1 MAX_JOBS=JOBS python setup.py bdist_wheel
 	echo "Wheel package is saved in ./dist/"
 
@@ -0,0 +1,77 @@
+FROM python:3.10 AS dev
+
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+
+WORKDIR /workspace
+
+# install build and runtime dependencies
+COPY requirements-cpu.txt requirements-cpu.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-cpu.txt
+
+# install development dependencies
+COPY requirements-dev.txt requirements-dev.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-dev.txt
+
+# image to build pytorch extensions
+FROM dev AS build
+
+# install build dependencies
+COPY requirements-build-cpu.txt requirements-build-cpu.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-build-cpu.txt
+
+# copy input files
+COPY csrc csrc
+COPY setup.py setup.py
+COPY requirements-cpu.txt requirements-cpu.txt
+COPY pyproject.toml pyproject.toml
+COPY vllm/__init__.py vllm/__init__.py
+
+# max jobs used by Ninja to build extensions
+ENV MAX_JOBS=$max_jobs
+RUN python3 setup.py build_ext --inplace
+
+# image to run unit testing suite
+FROM dev AS test
+
+# copy pytorch extensions separately to avoid having to rebuild
+# when python code changes
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY tests tests
+COPY vllm vllm
+
+ENTRYPOINT ["python3", "-m", "pytest", "tests"]
+
+# use CUDA base as CUDA runtime dependencies are already installed via pip
+FROM python:3.10 AS dev
+
+# libnccl required for ray
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+
+WORKDIR /workspace
+COPY requirements-cpu.txt requirements-cpu.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements-cpu.txt
+
+FROM vllm-base AS vllm
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+
+EXPOSE 8000
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
+
+# openai api server alternative
+FROM vllm-base AS vllm-openai
+# install additional dependencies for openai api server
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install accelerate fschat
+
+COPY --from=build /workspace/vllm/*.so /workspace/vllm/
+COPY vllm vllm
+
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+
@@ -14,10 +14,14 @@
 #define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...)                          \
   AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__))
 
+#ifdef VLLM_BUILD_CPU_ONLY
+#define VLLM_DISPATCH_TO_CUDA_CASE(BASENAME, ...) 
+#else
 #define VLLM_DISPATCH_TO_CUDA_CASE(BASENAME, ...)                              \
   case c10::DeviceType::CUDA: {                                                \
     return BASENAME(__VA_ARGS__);                                              \
   }
+#endif
 
 #ifdef VLLM_BUILD_CPU_OPS
 #define VLLM_DISPATCH_TO_CPU_CASE(BASENAME, ...)                               \
 
@@ -87,6 +87,12 @@ void gptq_shuffle_dispatch(
     VLLM_DISPATCH_DEVICES(q_weight.device(), gptq_shuffle, q_weight, q_perm);
 }
 
+#ifdef VLLM_BUILD_CPU_ONLY
+int get_device_attribute(
+    int attribute,
+    int device_id) { return 94387; }
+#endif
+
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   // vLLM custom ops
   pybind11::module ops = m.def_submodule("ops", "vLLM custom operators");
 
@@ -10,9 +10,9 @@ RUN apt-get update -y \
 WORKDIR /workspace
 
 # install build and runtime dependencies
-COPY requirements.txt requirements.txt
+COPY requirements-gpu.txt requirements-gpu.txt
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install -r requirements.txt
+    pip install -r requirements-gpu.txt
 
 # install development dependencies
 COPY requirements-dev.txt requirements-dev.txt
@@ -25,14 +25,14 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM dev AS build
 
 # install build dependencies
-COPY requirements-build.txt requirements-build.txt
+COPY requirements-build-gpu.txt requirements-build-gpu.txt
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install -r requirements-build.txt
+    pip install -r requirements-build-gpu.txt
 
 # copy input files
 COPY csrc csrc
 COPY setup.py setup.py
-COPY requirements.txt requirements.txt
+COPY requirements-gpu.txt requirements-gpu.txt
 COPY pyproject.toml pyproject.toml
 COPY vllm/__init__.py vllm/__init__.py
 
@@ -75,9 +75,9 @@ RUN apt-get update -y \
     && apt-get install -y python3-pip
 
 WORKDIR /workspace
-COPY requirements.txt requirements.txt
+COPY requirements-gpu.txt requirements-gpu.txt
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install -r requirements.txt
+    pip install -r requirements-gpu.txt
 #################### RUNTIME BASE IMAGE ####################
 
 
 
@@ -4,7 +4,7 @@ requires = [
     "ninja",
     "packaging",
     "setuptools >= 49.4.0",
-    "torch == 2.1.2",
+    "torch == 2.1.2+cpu",
     "wheel",
 ]
 build-backend = "setuptools.build_meta"
 
@@ -0,0 +1,6 @@
+# Should be mirrored in pyproject.toml
+ninja
+packaging
+setuptools>=49.4.0
+torch==2.1.2+cpu
+wheel
@@ -0,0 +1,15 @@
+ninja  # For faster builds.
+psutil
+ray >= 2.5.1
+pandas  # Required for Ray data.
+pyarrow  # Required for Ray data.
+pybind11
+sentencepiece  # Required for LLaMA tokenizer.
+numpy
+einops  # Required for phi-1_5
+torch == 2.1.2+cpu
+transformers >= 4.34.0  # Required for Mistral.
+fastapi
+uvicorn[standard]
+pydantic == 1.10.13  # Required for OpenAI server.
+aioprometheus[starlette]
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@ requires = [`
`4`	`4`	`"ninja",`
`5`	`5`	`"packaging",`
`6`	`6`	`"setuptools >= 49.4.0",`
`7`		`- "torch == 2.1.2",`
	`7`	`+ "torch == 2.1.2+cpu",`
`8`	`8`	`"wheel",`
`9`	`9`	`]`
`10`	`10`	`build-backend = "setuptools.build_meta"`