From 7e06bd300408dd1dd23b226bc88d8f387172f58a Mon Sep 17 00:00:00 2001
From: Huiqiang Jiang <hjiang@microsoft.com>
Date: Thu, 4 Jul 2024 13:21:16 +0800
Subject: [PATCH] Hotfix(MInference): fix the pip setup (#8)

Co-authored-by: Yucheng Li <liyucheng09@gmail.com>
Co-authored-by: Chengruidong Zhang <chengzhang@microsoft.com>
---
 .github/workflows/release.yml                | 76 +++++++++++++++++---
 .github/workflows/scripts/build.sh           |  5 +-
 .github/workflows/scripts/pytorch-install.sh | 28 ++++++--
 MANIFEST.in                                  |  2 +
 README.md                                    |  4 +-
 minference/version.py                        |  2 +-
 setup.py                                     | 75 ++++++++++++++++---
 7 files changed, 166 insertions(+), 26 deletions(-)
 create mode 100644 MANIFEST.in

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 89156f8..099b592 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -50,8 +50,40 @@ jobs:
       matrix:
           os: ['ubuntu-20.04']
           python-version: ['3.8', '3.9', '3.10', '3.11']
-          pytorch-version: ['2.3.0']  # Must be the most recent version that meets requirements-cuda.txt.
-          cuda-version: ['11.8', '12.1']
+          torch-version: ['2.2.2', '2.3.0']
+          cuda-version: ['12.2.2']
+          exclude:
+            # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
+            # Pytorch < 2.2 does not support Python 3.12
+            - pytorch-version: '1.12.1'
+              python-version: '3.12'
+            - pytorch-version: '1.13.1'
+              python-version: '3.12'
+            - pytorch-version: '2.0.1'
+              python-version: '3.12'
+            - pytorch-version: '2.1.2'
+              python-version: '3.12'
+            # Pytorch <= 1.12 does not support Python 3.11
+            - pytorch-version: '1.12.1'
+              python-version: '3.11'
+            # Pytorch >= 2.0 only supports Python >= 3.8
+            - pytorch-version: '2.0.1'
+              python-version: '3.7'
+            - pytorch-version: '2.1.2'
+              python-version: '3.7'
+            - pytorch-version: '2.2.2'
+              python-version: '3.7'
+            - pytorch-version: '2.3.0'
+              python-version: '3.7'
+            - pytorch-version: '2.4.0.dev20240407'
+              python-version: '3.7'
+            # Pytorch <= 2.0 only supports CUDA <= 11.8
+            - pytorch-version: '1.12.1'
+              cuda-version: '12.2.2'
+            - pytorch-version: '1.13.1'
+              cuda-version: '12.2.2'
+            - pytorch-version: '2.0.1'
+              cuda-version: '12.2.2'
 
     steps:
       - name: Checkout
@@ -80,8 +112,18 @@ jobs:
           echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
 
       - name: Install CUDA ${{ matrix.cuda-version }}
-        run: |
-          bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
+        if: ${{ matrix.cuda-version != 'cpu' }}
+        uses: Jimver/cuda-toolkit@v0.2.14
+        id: cuda-toolkit
+        with:
+          cuda: ${{ matrix.cuda-version }}
+          linux-local-args: '["--toolkit"]'
+          # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
+          # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
+          method: 'network'
+          # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
+          # not just nvcc
+          # sub-packages: '["nvcc"]'
 
       - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
         run: |
@@ -94,7 +136,7 @@ jobs:
         run: |
           bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} bdist_wheel
           wheel_name=$(ls dist/*whl | xargs -n 1 basename)
-          asset_name=${wheel_name//"linux"/"manylinux1"}
+          asset_name=${wheel_name}
           echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
           echo "asset_name=${asset_name}" >> $GITHUB_ENV
 
@@ -114,7 +156,7 @@ jobs:
           path: ./dist/${{ env.wheel_name }}
   publish_package:
     name: Publish Python 🐍 distribution 📦 to PyPI
-    needs: [release]
+    needs: [release, wheel]
     runs-on: ${{ matrix.os }}
     environment:
       name: pypi
@@ -128,7 +170,7 @@ jobs:
           os: ['ubuntu-20.04']
           python-version: ['3.10']
           pytorch-version: ['2.3.0']  # Must be the most recent version that meets requirements-cuda.txt.
-          cuda-version: ['12.1']
+          cuda-version: ['12.2.2']
 
     steps:
       - name: Checkout
@@ -150,9 +192,25 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install CUDA ${{ matrix.cuda-version }}
+      - name: Set CUDA and PyTorch versions
         run: |
-          bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
+          echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
+          echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
+          echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
+
+      - name: Install CUDA ${{ matrix.cuda-version }}
+        if: ${{ matrix.cuda-version != 'cpu' }}
+        uses: Jimver/cuda-toolkit@v0.2.14
+        id: cuda-toolkit
+        with:
+          cuda: ${{ matrix.cuda-version }}
+          linux-local-args: '["--toolkit"]'
+          # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
+          # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
+          method: 'network'
+          # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
+          # not just nvcc
+          # sub-packages: '["nvcc"]'
 
       - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
         run: |
diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
index c491b92..f830091 100644
--- a/.github/workflows/scripts/build.sh
+++ b/.github/workflows/scripts/build.sh
@@ -20,5 +20,8 @@ if [ "$3" = sdist ];
 then
 MINFERENCE_SKIP_CUDA_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
 else
-MINFERENCE_LOCAL_VERSION=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
+MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
+tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}
+wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
+ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
 fi
diff --git a/.github/workflows/scripts/pytorch-install.sh b/.github/workflows/scripts/pytorch-install.sh
index dfc1851..6e61ba8 100644
--- a/.github/workflows/scripts/pytorch-install.sh
+++ b/.github/workflows/scripts/pytorch-install.sh
@@ -4,11 +4,29 @@ python_executable=python$1
 pytorch_version=$2
 cuda_version=$3
 
-# Install torch
-$python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya
-$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./}
-
-# Print version information
+pip install --upgrade pip
+# If we don't install before installing Pytorch, we get error for torch 2.0.1
+# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
+pip install lit
+# For some reason torch 2.2.0 on python 3.12 errors saying no setuptools
+pip install setuptools
+# We want to figure out the CUDA version to download pytorch
+# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
+# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
+# This code is ugly, maybe there's a better way to do this.
+echo $MATRIX_CUDA_VERSION
+echo $MATRIX_TORCH_VERSION
+export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
+minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118, '2.3': 118, '2.4': 118}[env['MATRIX_TORCH_VERSION']]; \
+maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121, '2.3': 121, '2.4': 121}[env['MATRIX_TORCH_VERSION']]; \
+print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \
+)
+if [[ ${pytorch_version} == *"dev"* ]]; then
+pip install --no-cache-dir --pre torch==${pytorch_version} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
+else
+pip install --no-cache-dir torch==${pytorch_version} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
+fi
+nvcc --version
 $python_executable --version
 $python_executable -c "import torch; print('PyTorch:', torch.__version__)"
 $python_executable -c "import torch; print('CUDA:', torch.version.cuda)"
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..eb5ecfc
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include csrc *.cu
+recursive-include csrc *.cpp
diff --git a/README.md b/README.md
index ed3233b..bf6c99d 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 <p align="center">
     | <a href="https://aka.ms/MInference"><b>Project Page</b></a> |
-    <a href="https://export.arxiv.org/pdf/2407.02490"><b>Paper</b></a> |
+    <a href="https://arxiv.org/abs/2407.02490"><b>Paper</b></a> |
     <a href="https://huggingface.co/spaces/microsoft/MInference"><b>HF Demo</b></a> |
 </p>
 
@@ -103,7 +103,7 @@ attn_output = block_sparse_attention(q, k, v, topk)
 attn_output = streaming_forward(q, k, v, init_num, local_window_num)
 ```
 
-For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments).
+For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments). You can find more information about the dynamic compiler PIT in this [paper](https://dl.acm.org/doi/10.1145/3600006.3613139) and on [GitHub](https://github.com/microsoft/SparTA/tree/pit_artifact).
 
 ## FAQ
 
diff --git a/minference/version.py b/minference/version.py
index d8a5922..d7adf2a 100644
--- a/minference/version.py
+++ b/minference/version.py
@@ -5,7 +5,7 @@
 _MINOR = "1"
 # On master and in a nightly release the patch should be one ahead of the last
 # released build.
-_PATCH = "1"
+_PATCH = "2"
 # This is mainly for nightly builds which have the suffix ".dev$DATE". See
 # https://semver.org/#is-v123-a-semantic-version for the semantics.
 _SUFFIX = ""
diff --git a/setup.py b/setup.py
index a312a21..2ee4f73 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,10 @@
 # Licensed under The MIT License [see LICENSE for details]
 
 import os
+import platform
 import subprocess
+import sys
+import urllib
 
 import torch
 from packaging.version import Version, parse
@@ -61,8 +64,6 @@
 # SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
 FORCE_BUILD = os.getenv("MINFERENCE_FORCE_BUILD", "FALSE") == "TRUE"
 SKIP_CUDA_BUILD = os.getenv("MINFERENCE_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
-# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
-FORCE_CXX11_ABI = os.getenv("MINFERENCE_FORCE_CXX11_ABI", "FALSE") == "TRUE"
 
 
 def check_if_cuda_home_none(global_option: str) -> None:
@@ -96,11 +97,6 @@ def check_if_cuda_home_none(global_option: str) -> None:
 
     check_if_cuda_home_none("minference")
 
-    # HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
-    # torch._C._GLIBCXX_USE_CXX11_ABI
-    # https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
-    if FORCE_CXX11_ABI:
-        torch._C._GLIBCXX_USE_CXX11_ABI = True
     ext_modules.append(
         CUDAExtension(
             name="minference.cuda",
@@ -123,6 +119,47 @@ def get_minference_version() -> str:
         return str(version)
 
 
+def get_platform():
+    """
+    Returns the platform name as used in wheel filenames.
+    """
+    if sys.platform.startswith("linux"):
+        return f"linux_{platform.uname().machine}"
+    elif sys.platform == "darwin":
+        mac_version = ".".join(platform.mac_ver()[0].split(".")[:2])
+        return f"macosx_{mac_version}_x86_64"
+    elif sys.platform == "win32":
+        return "win_amd64"
+    else:
+        raise ValueError("Unsupported platform: {}".format(sys.platform))
+
+
+def get_wheel_url():
+    # Determine the version numbers that will be used to determine the correct wheel
+    # We're using the CUDA version used to build torch, not the one currently installed
+    # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
+    torch_cuda_version = parse(torch.version.cuda)
+    torch_version_raw = parse(torch.__version__)
+    # For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2
+    # to save CI time. Minor versions should be compatible.
+    torch_cuda_version = (
+        parse("11.8") if torch_cuda_version.major == 11 else parse("12.2")
+    )
+    python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
+    platform_name = get_platform()
+    minference_version = get_minference_version()
+    # cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
+    cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
+    torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"
+
+    # Determine wheel URL based on CUDA version, torch version, python version and OS
+    wheel_filename = f"{PACKAGE_NAME}-{minference_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl"
+    wheel_url = BASE_WHEEL_URL.format(
+        tag_name=f"v{minference_version}", wheel_name=wheel_filename
+    )
+    return wheel_url, wheel_filename
+
+
 class CachedWheelsCommand(_bdist_wheel):
     """
     The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
@@ -132,7 +169,29 @@ class CachedWheelsCommand(_bdist_wheel):
     """
 
     def run(self):
-        return super().run()
+        if True:
+            return super().run()
+        wheel_url, wheel_filename = get_wheel_url()
+        print("Guessing wheel URL: ", wheel_url)
+        try:
+            urllib.request.urlretrieve(wheel_url, wheel_filename)
+
+            # Make the archive
+            # Lifted from the root wheel processing command
+            # https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85
+            if not os.path.exists(self.dist_dir):
+                os.makedirs(self.dist_dir)
+
+            impl_tag, abi_tag, plat_tag = self.get_tag()
+            archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"
+
+            wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
+            print("Raw wheel path", wheel_path)
+            os.rename(wheel_filename, wheel_path)
+        except (urllib.error.HTTPError, urllib.error.URLError):
+            print("Precompiled wheel not found. Building from source...")
+            # If the wheel could not be downloaded, build from source
+            super().run()
 
 
 class NinjaBuildExtension(BuildExtension):