Skip to content

Commit

Permalink
update docker file
Browse files Browse the repository at this point in the history
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
  • Loading branch information
sywangyi committed Mar 8, 2024
1 parent b3989cf commit ea03af5
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 9 deletions.
35 changes: 31 additions & 4 deletions Dockerfile_intel
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down Expand Up @@ -35,6 +34,7 @@ COPY router router
COPY launcher launcher
RUN cargo build --release


# Text Generation Inference base image for Intel
FROM intel/intel-extension-for-pytorch:2.1.10-xpu as base

Expand All @@ -47,22 +47,49 @@ RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list

RUN apt-get update && apt install -y intel-basekit xpu-smi
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev

# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80


WORKDIR /usr/src
# Build pytorch and ipex
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b xpu_main origin/xpu-main
RUN git clone https://github.com/pytorch/pytorch.git && cd pytorch && git checkout 209f2fa8ff86652f67d75c2f19bf9cb9942fd018 && git apply /usr/src/intel-extension-for-pytorch/torch_patches/00*.patch

# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_common.txt && \
pip install ".[accelerate, peft]" --no-cache-dir
pip install ".[accelerate, peft, outlines]" --no-cache-dir

ENV CCL_ROOT=/opt/intel/oneapi/ccl/2021.11
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/2021.11
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/2024.0/etc/compiler/sys_check/sys_check.sh
ENV CCL_CONFIGURATION=cpu_gpu_dpcpp
ENV MANPATH=/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/compiler/2024.0/documentation/en/man/common:
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/2024.0/lib/cmake:/opt/intel/oneapi/compiler/2024.0
ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/2024.0
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mkl/2024.0/lib/:/opt/intel/oneapi/compiler/2024.0/lib
ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/2024.0/lib/libintelocl.so
ENV CLASSPATH=/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar:/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/mkl/2024.0/lib:/opt/intel/oneapi/compiler/2024.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.0/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
ENV MKLROOT=/opt/intel/oneapi/mkl/2024.0
ENV NLSPATH=/opt/intel/oneapi/mkl/2024.0/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/2024.0/lib/locale/%l_%t/%N
ENV PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/2021.11/bin:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/2024.0/bin/:/opt/intel/oneapi/compiler/2024.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV CPATH=/opt/intel/oneapi/mpi/2021.11/include:/opt/intel/oneapi/ccl/2021.11/include:/opt/intel/oneapi/mkl/2024.0/include
ENV CCL_ZE_IPC_EXCHANGE=sockets


RUN pip uninstall -y torch && cd pytorch && git submodule update --init --recursive && python setup.py install
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=ON BUILD_WITH_CPU=ON USE_XETLA=ON python setup.py install

# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
Expand Down
2 changes: 1 addition & 1 deletion server/text_generation_server/models/flash_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
SLIDING_WINDOW_BLOCKS: Optional[int] = None
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM

MEM_POOL = torch.cuda.graph_pool_handle()
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None


def set_sliding_window(sliding_window: int, sliding_window_blocks: int):
Expand Down
2 changes: 1 addition & 1 deletion server/text_generation_server/models/globals.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
import os

MEM_POOL = torch.cuda.graph_pool_handle()
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
# This is overridden by the cli
ENABLE_CUDA_GRAPHS = os.getenv("ENABLE_CUDA_GRAPHS", "false").lower() in {"1", "true"}
3 changes: 1 addition & 2 deletions server/text_generation_server/utils/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,11 +713,10 @@ def forward(self, hidden_states, residual=None):
residual = hidden_states
out = ipex.llm.modules.RMSNorm.apply(
hidden_states,
[hidden_states.size(-1)],
self.weight,
self.variance_epsilon,
)
return out[0], residual
return out, residual
elif hidden_states.shape[-1] > 8192:
if residual is not None:
hidden_states += residual
Expand Down
2 changes: 1 addition & 1 deletion server/text_generation_server/utils/paged_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def reshape_and_cache(
cache_ops.reshape_and_cache(key, value, key_cache, value_cache, slots)
elif IS_XPU_SYSTEM:
ipex.llm.modules.PagedAttention.reshape_and_cache(
key, value, key_cache, value_cache, slots
key, value, key_cache, value_cache, slots.to(torch.int64)
)


Expand Down

0 comments on commit ea03af5

Please sign in to comment.