Skip to content

Commit

Permalink
Hotfix(MInference): fix the pip setup (#8)
Browse files Browse the repository at this point in the history
Co-authored-by: Yucheng Li <liyucheng09@gmail.com>
Co-authored-by: Chengruidong Zhang <chengzhang@microsoft.com>
  • Loading branch information
3 people authored Jul 4, 2024
1 parent cfda78c commit 7e06bd3
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 26 deletions.
76 changes: 67 additions & 9 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,40 @@ jobs:
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11']
pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1']
torch-version: ['2.2.2', '2.3.0']
cuda-version: ['12.2.2']
exclude:
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# Pytorch < 2.2 does not support Python 3.12
- pytorch-version: '1.12.1'
python-version: '3.12'
- pytorch-version: '1.13.1'
python-version: '3.12'
- pytorch-version: '2.0.1'
python-version: '3.12'
- pytorch-version: '2.1.2'
python-version: '3.12'
# Pytorch <= 1.12 does not support Python 3.11
- pytorch-version: '1.12.1'
python-version: '3.11'
# Pytorch >= 2.0 only supports Python >= 3.8
- pytorch-version: '2.0.1'
python-version: '3.7'
- pytorch-version: '2.1.2'
python-version: '3.7'
- pytorch-version: '2.2.2'
python-version: '3.7'
- pytorch-version: '2.3.0'
python-version: '3.7'
- pytorch-version: '2.4.0.dev20240407'
python-version: '3.7'
# Pytorch <= 2.0 only supports CUDA <= 11.8
- pytorch-version: '1.12.1'
cuda-version: '12.2.2'
- pytorch-version: '1.13.1'
cuda-version: '12.2.2'
- pytorch-version: '2.0.1'
cuda-version: '12.2.2'

steps:
- name: Checkout
Expand Down Expand Up @@ -80,8 +112,18 @@ jobs:
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
- name: Install CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.14
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
linux-local-args: '["--toolkit"]'
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
method: 'network'
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
# not just nvcc
# sub-packages: '["nvcc"]'

- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
run: |
Expand All @@ -94,7 +136,7 @@ jobs:
run: |
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} bdist_wheel
wheel_name=$(ls dist/*whl | xargs -n 1 basename)
asset_name=${wheel_name//"linux"/"manylinux1"}
asset_name=${wheel_name}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
echo "asset_name=${asset_name}" >> $GITHUB_ENV
Expand All @@ -114,7 +156,7 @@ jobs:
path: ./dist/${{ env.wheel_name }}
publish_package:
name: Publish Python 🐍 distribution 📦 to PyPI
needs: [release]
needs: [release, wheel]
runs-on: ${{ matrix.os }}
environment:
name: pypi
Expand All @@ -128,7 +170,7 @@ jobs:
os: ['ubuntu-20.04']
python-version: ['3.10']
pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['12.1']
cuda-version: ['12.2.2']

steps:
- name: Checkout
Expand All @@ -150,9 +192,25 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Install CUDA ${{ matrix.cuda-version }}
- name: Set CUDA and PyTorch versions
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.14
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
linux-local-args: '["--toolkit"]'
# default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1
# method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }}
method: 'network'
# We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
# not just nvcc
# sub-packages: '["nvcc"]'

- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
run: |
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ if [ "$3" = sdist ];
then
MINFERENCE_SKIP_CUDA_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
else
MINFERENCE_LOCAL_VERSION=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
fi
28 changes: 23 additions & 5 deletions .github/workflows/scripts/pytorch-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,29 @@ python_executable=python$1
pytorch_version=$2
cuda_version=$3

# Install torch
$python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya
$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./}

# Print version information
pip install --upgrade pip
# If we don't install before installing Pytorch, we get error for torch 2.0.1
# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none)
pip install lit
# For some reason torch 2.2.0 on python 3.12 errors saying no setuptools
pip install setuptools
# We want to figure out the CUDA version to download pytorch
# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# This code is ugly, maybe there's a better way to do this.
echo $MATRIX_CUDA_VERSION
echo $MATRIX_TORCH_VERSION
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118, '2.3': 118, '2.4': 118}[env['MATRIX_TORCH_VERSION']]; \
maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121, '2.3': 121, '2.4': 121}[env['MATRIX_TORCH_VERSION']]; \
print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \
)
if [[ ${pytorch_version} == *"dev"* ]]; then
pip install --no-cache-dir --pre torch==${pytorch_version} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}
else
pip install --no-cache-dir torch==${pytorch_version} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION}
fi
nvcc --version
$python_executable --version
$python_executable -c "import torch; print('PyTorch:', torch.__version__)"
$python_executable -c "import torch; print('CUDA:', torch.version.cuda)"
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
recursive-include csrc *.cu
recursive-include csrc *.cpp
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<p align="center">
| <a href="https://aka.ms/MInference"><b>Project Page</b></a> |
<a href="https://export.arxiv.org/pdf/2407.02490"><b>Paper</b></a> |
<a href="https://arxiv.org/abs/2407.02490"><b>Paper</b></a> |
<a href="https://huggingface.co/spaces/microsoft/MInference"><b>HF Demo</b></a> |
</p>

Expand Down Expand Up @@ -103,7 +103,7 @@ attn_output = block_sparse_attention(q, k, v, topk)
attn_output = streaming_forward(q, k, v, init_num, local_window_num)
```

For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments).
For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments). You can find more information about the dynamic compiler PIT in this [paper](https://dl.acm.org/doi/10.1145/3600006.3613139) and on [GitHub](https://github.com/microsoft/SparTA/tree/pit_artifact).

## FAQ

Expand Down
2 changes: 1 addition & 1 deletion minference/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
_MINOR = "1"
# On master and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "1"
_PATCH = "2"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""
Expand Down
75 changes: 67 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# Licensed under The MIT License [see LICENSE for details]

import os
import platform
import subprocess
import sys
import urllib

import torch
from packaging.version import Version, parse
Expand Down Expand Up @@ -61,8 +64,6 @@
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD = os.getenv("MINFERENCE_FORCE_BUILD", "FALSE") == "TRUE"
SKIP_CUDA_BUILD = os.getenv("MINFERENCE_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI = os.getenv("MINFERENCE_FORCE_CXX11_ABI", "FALSE") == "TRUE"


def check_if_cuda_home_none(global_option: str) -> None:
Expand Down Expand Up @@ -96,11 +97,6 @@ def check_if_cuda_home_none(global_option: str) -> None:

check_if_cuda_home_none("minference")

# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if FORCE_CXX11_ABI:
torch._C._GLIBCXX_USE_CXX11_ABI = True
ext_modules.append(
CUDAExtension(
name="minference.cuda",
Expand All @@ -123,6 +119,47 @@ def get_minference_version() -> str:
return str(version)


def get_platform():
"""
Returns the platform name as used in wheel filenames.
"""
if sys.platform.startswith("linux"):
return f"linux_{platform.uname().machine}"
elif sys.platform == "darwin":
mac_version = ".".join(platform.mac_ver()[0].split(".")[:2])
return f"macosx_{mac_version}_x86_64"
elif sys.platform == "win32":
return "win_amd64"
else:
raise ValueError("Unsupported platform: {}".format(sys.platform))


def get_wheel_url():
# Determine the version numbers that will be used to determine the correct wheel
# We're using the CUDA version used to build torch, not the one currently installed
# _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
torch_cuda_version = parse(torch.version.cuda)
torch_version_raw = parse(torch.__version__)
# For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2
# to save CI time. Minor versions should be compatible.
torch_cuda_version = (
parse("11.8") if torch_cuda_version.major == 11 else parse("12.2")
)
python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
platform_name = get_platform()
minference_version = get_minference_version()
# cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"

# Determine wheel URL based on CUDA version, torch version, python version and OS
wheel_filename = f"{PACKAGE_NAME}-{minference_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl"
wheel_url = BASE_WHEEL_URL.format(
tag_name=f"v{minference_version}", wheel_name=wheel_filename
)
return wheel_url, wheel_filename


class CachedWheelsCommand(_bdist_wheel):
"""
The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
Expand All @@ -132,7 +169,29 @@ class CachedWheelsCommand(_bdist_wheel):
"""

def run(self):
return super().run()
if True:
return super().run()
wheel_url, wheel_filename = get_wheel_url()
print("Guessing wheel URL: ", wheel_url)
try:
urllib.request.urlretrieve(wheel_url, wheel_filename)

# Make the archive
# Lifted from the root wheel processing command
# https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85
if not os.path.exists(self.dist_dir):
os.makedirs(self.dist_dir)

impl_tag, abi_tag, plat_tag = self.get_tag()
archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}"

wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl")
print("Raw wheel path", wheel_path)
os.rename(wheel_filename, wheel_path)
except (urllib.error.HTTPError, urllib.error.URLError):
print("Precompiled wheel not found. Building from source...")
# If the wheel could not be downloaded, build from source
super().run()


class NinjaBuildExtension(BuildExtension):
Expand Down

0 comments on commit 7e06bd3

Please sign in to comment.