hpcaitech
diff --git a/‎.github/workflows/build_on_pr.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_on_pr.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/doc_test_on_pr.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/doc_test_on_pr.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/example_check_on_pr.yml‎
Lines changed: 24 additions & 1 deletion b/‎.github/workflows/example_check_on_pr.yml‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎.github/workflows/example_check_on_schedule.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/example_check_on_schedule.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 16 additions & 21 deletions b/‎README.md‎
Lines changed: 16 additions & 21 deletions
@@ -91,7 +91,7 @@ jobs:
     container:
       image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
       options: --gpus all --rm -v /dev/shm -v /data/scratch/llama-tiny:/data/scratch/llama-tiny
-    timeout-minutes: 60
+    timeout-minutes: 90
     defaults:
       run:
         shell: bash
 
@@ -58,7 +58,7 @@ jobs:
     container:
       image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
       options: --gpus all --rm
-    timeout-minutes: 20
+    timeout-minutes: 30
     defaults:
       run:
         shell: bash
 
@@ -8,6 +8,7 @@ on:
     # any change in the examples folder will trigger check for the corresponding example.
     paths:
       - "examples/**"
+      - "!examples/**.md"
 
 jobs:
   # This is for changed example files detect and output a matrix containing all the corresponding directory name.
@@ -19,6 +20,7 @@ jobs:
     outputs:
       matrix: ${{ steps.setup-matrix.outputs.matrix }}
       anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
+      anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }}
     name: Detect changed example files
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change
@@ -37,6 +39,16 @@ jobs:
           echo $commonCommit
           echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
 
+      - name: Find the changed extension-related files
+        id: find-extension-change
+        uses: tj-actions/changed-files@v35
+        with:
+          base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
+          files: |
+            op_builder/**
+            colossalai/kernel/**
+            setup.py
+
       - name: Get all changed example files
         id: changed-files
         uses: tj-actions/changed-files@v35
@@ -79,17 +91,28 @@ jobs:
     container:
       image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
-    timeout-minutes: 20
+    timeout-minutes: 30
     concurrency:
       group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-example-${{ matrix.directory }}
       cancel-in-progress: true
     steps:
       - uses: actions/checkout@v3
 
+      - name: Restore Colossal-AI Cache
+        if: needs.detect.outputs.anyExtensionFileChanged != 'true'
+        run: |
+          if [ -d /github/home/cuda_ext_cache ] && [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ]; then
+            cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
+          fi
+
       - name: Install Colossal-AI
         run: |
           BUILD_EXT=1 pip install -v .
 
+      - name: Store Colossal-AI Cache
+        run: |
+          cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
+
       - name: Test the example
         run: |
           example_dir=${{ matrix.directory }}
 
@@ -36,7 +36,7 @@ jobs:
     container:
       image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
-    timeout-minutes: 10
+    timeout-minutes: 30
     steps:
       - name: 📚 Checkout
         uses: actions/checkout@v3
 
@@ -25,6 +25,7 @@
 </div>
 
 ## Latest News
+* [2024/05] [Large AI Models Inference Speed Doubled, Colossal-Inference Open Source Release](https://hpc-ai.com/blog/colossal-inference)
 * [2024/04] [Open-Sora Unveils Major Upgrade: Embracing Open Source with Single-Shot 16-Second Video Generation and 720p Resolution](https://hpc-ai.com/blog/open-soras-comprehensive-upgrade-unveiled-embracing-16-second-video-generation-and-720p-resolution-in-open-source)
 * [2024/04] [Most cost-effective solutions for inference, fine-tuning and pretraining, tailored to LLaMA3 series](https://hpc-ai.com/blog/most-cost-effective-solutions-for-inference-fine-tuning-and-pretraining-tailored-to-llama3-series)
 * [2024/03] [314 Billion Parameter Grok-1 Inference Accelerated by 3.8x, Efficient and Easy-to-Use PyTorch+HuggingFace version is Here](https://hpc-ai.com/blog/314-billion-parameter-grok-1-inference-accelerated-by-3.8x-efficient-and-easy-to-use-pytorchhuggingface-version-is-here)
@@ -75,11 +76,9 @@
  <li>
    <a href="#Inference">Inference</a>
    <ul>
+     <li><a href="#Colossal-Inference">Colossal-Inference: Large AI  Models Inference Speed Doubled</a></li>
      <li><a href="#Grok-1">Grok-1: 314B model of PyTorch + HuggingFace Inference</a></li>
      <li><a href="#SwiftInfer">SwiftInfer:Breaks the Length Limit of LLM for Multi-Round Conversations with 46% Acceleration</a></li>
-     <li><a href="#GPT-3-Inference">GPT-3</a></li>
-     <li><a href="#OPT-Serving">OPT-175B Online Serving for Text Generation</a></li>
-     <li><a href="#BLOOM-Inference">176B BLOOM</a></li>
    </ul>
  </li>
  <li>
@@ -377,6 +376,19 @@ Please visit our [documentation](https://www.colossalai.org/) and [examples](htt
 
 
 ## Inference
+### Colossal-Inference
+<p align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/inference/colossal-inference-v1-1.png" width=1000/>
+</p>
+
+<p align="center">
+<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/inference/colossal-inference-v1-2.png" width=1000/>
+</p>
+
+ - Large AI models inference speed doubled, compared to the offline inference performance of vLLM in some cases.
+[[code]](https://github.com/hpcaitech/ColossalAI/tree/main/colossalai/inference)
+[[blog]](https://hpc-ai.com/blog/colossal-inference)
+
 ### Grok-1
 <p id="Grok-1" align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/examples/images/grok-1-inference.jpg" width=600/>
@@ -389,30 +401,13 @@ Please visit our [documentation](https://www.colossalai.org/) and [examples](htt
 [[HuggingFace Grok-1 PyTorch model weights]](https://huggingface.co/hpcai-tech/grok-1)
 [[ModelScope Grok-1 PyTorch model weights]](https://www.modelscope.cn/models/colossalai/grok-1-pytorch/summary)
 
+### SwiftInfer
 <p id="SwiftInfer" align="center">
 <img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/SwiftInfer.jpg" width=800/>
 </p>
 
 - [SwiftInfer](https://github.com/hpcaitech/SwiftInfer): Inference performance improved by 46%, open source solution breaks the length limit of LLM for multi-round conversations
 
-<p id="GPT-3-Inference" align="center">
-<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/inference_GPT-3.jpg" width=800/>
-</p>
-
-- [Energon-AI](https://github.com/hpcaitech/EnergonAI): 50% inference acceleration on the same hardware
-
-<p id="OPT-Serving" align="center">
-<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/BLOOM%20serving.png" width=600/>
-</p>
-
-- [OPT Serving](https://colossalai.org/docs/advanced_tutorials/opt_service): Try 175-billion-parameter OPT online services
-
-<p id="BLOOM-Inference" align="center">
-<img src="https://raw.githubusercontent.com/hpcaitech/public_assets/main/colossalai/img/BLOOM%20Inference.PNG" width=800/>
-</p>
-
-- [BLOOM](https://github.com/hpcaitech/EnergonAI/tree/main/examples/bloom): Reduce hardware deployment costs of 176-billion-parameter BLOOM by more than 10 times.
-
 <p align="right">(<a href="#top">back to top</a>)</p>
 
 ## Installation