ModelTC
diff --git a/‎.github/workflows/docker-publish.yml
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/docker-publish.yml
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 19 deletions b/‎README.md
Lines changed: 3 additions & 19 deletions
diff --git a/‎assets/logo_new.png
377 KB b/‎assets/logo_new.png
377 KB
diff --git a/‎build_and_upload_docker.sh
Lines changed: 5 additions & 1 deletion b/‎build_and_upload_docker.sh
Lines changed: 5 additions & 1 deletion
diff --git a/‎Dockerfile renamed to ‎docker/Dockerfile b/‎Dockerfile renamed to ‎docker/Dockerfile
diff --git a/‎docker/Dockerfile.deepep
Lines changed: 78 additions & 0 deletions b/‎docker/Dockerfile.deepep
Lines changed: 78 additions & 0 deletions
@@ -95,12 +95,25 @@ jobs:
         uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a
         with:
           context: .
+          file: ./docker/Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=gha
           cache-to: type=gha,mode=max
 
+      # Build and push specific Docker image for deepep
+      # https://github.com/docker/build-push-action
+      - name: Build and push deepep Docker image
+        id: build-and-push-deepep
+        uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a
+        with:
+          context: .
+          file: ./docker/Dockerfile.deepep
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ghcr.io/modeltc/lightllm:main-deepep
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
 
       # Sign the resulting Docker image digest except on PRs.
       # This will only write to the public Rekor transparency log when the Docker
 
@@ -1,6 +1,6 @@
 <div align="center">
   <picture>
-    <img alt="LightLLM" src="assets/lightllm.drawio.png" width=90%>
+    <img alt="LightLLM" src="assets/logo_new.png" width=90%>
   </picture>
 </div>
 
@@ -29,8 +29,7 @@ LightLLM is a Python-based LLM (Large Language Model) inference and serving fram
 
 - [Install LightLLM](https://lightllm-en.readthedocs.io/en/latest/getting_started/installation.html)
 - [Quick Start](https://lightllm-en.readthedocs.io/en/latest/getting_started/quickstart.html)
-- [LLM Service](https://lightllm-en.readthedocs.io/en/latest/models/test.html#llama)
-- [VLM Service](https://lightllm-en.readthedocs.io/en/latest/models/test.html#llava)
+- [TuTorial](https://lightllm-en.readthedocs.io/en/latest/tutorial/deepseek_deployment.html)
 
 
 ## Performance
@@ -45,23 +44,8 @@ Please refer to the [FAQ](https://lightllm-en.readthedocs.io/en/latest/faq.html)
 
 We welcome any coopoeration and contribution. If there is a project requires LightLLM's support, please contact us via email or create a pull request.
 
-
-1. <details><summary> <b><a href=https://github.com/LazyAGI/LazyLLM>LazyLLM</a></b>: Easyest and lazyest way for building multi-agent LLMs applications.</summary>
-
-    Once you have installed `lightllm` and `lazyllm`, and then you can use the following code to build your own chatbot:
-
-    ~~~python
-    from lazyllm import TrainableModule, deploy, WebModule
-    # Model will be download automatically if you have an internet connection
-    m = TrainableModule('internlm2-chat-7b').deploy_method(deploy.lightllm)
-    WebModule(m).start().wait()
-    ~~~
-
-    Documents: https://lazyllm.readthedocs.io/
-
-    </details>
-
 Projects based on LightLLM or referenced LightLLM components:
+- [LazyLLM](https://github.com/LazyAGI/LazyLLM)
 - [LoongServe, Peking University](https://github.com/LoongServe/LoongServe)
 - [OmniKV, Ant Group](https://github.com/antgroup/OmniKV)
 - [vLLM](https://github.com/vllm-project/vllm) (some LightLLM's kernel used)
 
@@ -17,5 +17,9 @@ fi
 IMAGE_TAG=$2
 ACCOUNT=$1
 aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com
-DOCKER_BUILDKIT=1 docker build -t $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG .
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG .
 docker push $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG
+
+#deepep
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.deepep -t $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG-deepep .
+docker push $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com/lightllm:$IMAGE_TAG-deepep
@@ -0,0 +1,78 @@
+FROM nvcr.io/nvidia/tritonserver:24.04-py3-min as base
+ARG PYTORCH_VERSION=2.6.0
+ARG PYTHON_VERSION=3.9
+ARG CUDA_VERSION=12.4
+ARG MAMBA_VERSION=23.1.0-1
+ARG TARGETPLATFORM
+
+ENV PATH=/opt/conda/bin:$PATH \
+    CONDA_PREFIX=/opt/conda
+
+RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    ca-certificates \
+    libssl-dev \
+    curl \
+    g++ \
+    make \
+    git && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN case ${TARGETPLATFORM} in \
+    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+    *)              MAMBA_ARCH=x86_64   ;; \
+    esac && \
+    curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
+    bash ~/mambaforge.sh -b -p /opt/conda && \
+    rm ~/mambaforge.sh
+
+RUN case ${TARGETPLATFORM} in \
+    "linux/arm64")  exit 1 ;; \
+    *)              /opt/conda/bin/conda update -y conda &&  \
+    /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
+
+
+WORKDIR /root
+
+COPY ./requirements.txt /lightllm/requirements.txt
+RUN pip install -r /lightllm/requirements.txt --no-cache-dir --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
+
+RUN pip install --no-cache-dir https://github.com/ModelTC/flash-attn-3-build/releases/download/v2.7.4.post1/flash_attn-3.0.0b1-cp39-cp39-linux_x86_64.whl
+
+RUN pip install --no-cache-dir nvidia-nccl-cu12==2.25.1  # for allreduce hang issues in multinode H100
+
+RUN git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git
+RUN cd DeepGEMM && python setup.py install
+
+WORKDIR /root
+RUN git clone https://github.com/deepseek-ai/DeepEP.git
+
+# NVSHMEM
+RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
+RUN tar -xf nvshmem_src_3.2.5-1.txz \
+    && mv nvshmem_src nvshmem
+
+WORKDIR /root/nvshmem
+RUN git apply /root/DeepEP/third-party/nvshmem.patch
+
+WORKDIR /root/nvshmem
+ENV CUDA_HOME=/usr/local/cuda
+RUN NVSHMEM_SHMEM_SUPPORT=0 \
+    NVSHMEM_UCX_SUPPORT=0 \
+    NVSHMEM_USE_NCCL=0 \
+    NVSHMEM_MPI_SUPPORT=0 \
+    NVSHMEM_IBGDA_SUPPORT=1 \
+    NVSHMEM_PMIX_SUPPORT=0 \
+    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
+    NVSHMEM_USE_GDRCOPY=1 \
+    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 -DMLX5_lib=/usr/lib/x86_64-linux-gnu/libmlx5.so.1 \
+    && cd build \
+    && make install -j64
+
+WORKDIR /root/DeepEP
+ENV NVSHMEM_DIR=/root/nvshmem/install
+RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
+
+COPY . /lightllm
+RUN pip install -e /lightllm --no-cache-dir