Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 29 additions & 24 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ name: Release

on:
push:
tags:
- v*
branches:
- main
pull_request:
branches:
- main

# Needed to create release and upload assets
permissions:
Expand Down Expand Up @@ -42,25 +45,27 @@ jobs:

wheel:
name: Build Wheel
runs-on: ${{ matrix.os }}
needs: release

runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
pytorch-version: ['2.2.2', '2.3.1', '2.4.0', '2.5.1', '2.6.0']
cuda-version: ['12.4.0']
exclude:
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# Pytorch < 2.5 does not support Python 3.13
- pytorch-version: '2.2.2'
python-version: '3.13'
- pytorch-version: '2.3.1'
python-version: '3.13'
- pytorch-version: '2.4.0'
python-version: '3.13'
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
os: [ ubuntu-22.04, ubuntu-22.04-arm ]
python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
pytorch-version: [ '2.4.0', '2.5.1', '2.6.0', '2.7.0' ]
cuda-version: [ '12.4.1' ]
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi: [ 'FALSE', 'TRUE' ]
exclude:
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# Pytorch < 2.5 does not support Python 3.13
- pytorch-version: '2.4.0'
python-version: '3.13'

steps:
- name: Checkout
Expand Down Expand Up @@ -90,7 +95,7 @@ jobs:

- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.19
uses: Jimver/cuda-toolkit@v0.2.23
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
Expand Down Expand Up @@ -142,10 +147,10 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.10']
pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['12.2.2']
os: ['ubuntu-latest']
python-version: ['3.12']
pytorch-version: ['2.7.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: [ '12.4.1' ]

steps:
- name: Checkout
Expand All @@ -163,7 +168,7 @@ jobs:
bash -x .github/workflows/scripts/env.sh

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -175,7 +180,7 @@ jobs:

- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.14
uses: Jimver/cuda-toolkit@v0.2.23
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ $python_executable -m pip install flash_attn triton
# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
# Build
if [ "$3" = sdist ];
then
Expand Down
31 changes: 30 additions & 1 deletion .github/workflows/scripts/cuda-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,37 @@ cuda_version=$(echo $1 | tr "." "-")
# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
OS=$(echo $2 | tr -d ".\-")

ARCH=$(uname -m)
ARCH_TYPE=$ARCH

# Detectar si es Tegra
if [[ "$ARCH" == "aarch64" ]]; then
if uname -a | grep -qi tegra; then
ARCH_TYPE="tegra-aarch64"
fi
fi

echo "Detected architecture: ${ARCH_TYPE}"

# Installs CUDA
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb
if [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then
# Jetson (Tegra)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600

elif [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then
# Jetson (Tegra)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600
else
# ARM64 SBSA (Grace)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/sbsa/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600
fi

sudo dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
sudo apt -qq update
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/scripts/pytorch-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ pip install typing-extensions==4.12.2
echo $MATRIX_CUDA_VERSION
echo $MATRIX_TORCH_VERSION
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
minv = {'2.2': 118, '2.3': 118, '2.4': 118, '2.5': 118, '2.6': 118}[env['MATRIX_TORCH_VERSION']]; \
maxv = {'2.2': 121, '2.3': 121, '2.4': 124, '2.5': 124, '2.6': 124}[env['MATRIX_TORCH_VERSION']]; \
minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118}[env['MATRIX_TORCH_VERSION']]; \
maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128}[env['MATRIX_TORCH_VERSION']]; \
print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \
)
if [[ ${pytorch_version} == *"dev"* ]]; then
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.9", "3.10", "3.11"]
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest, windows-11-arm]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
exclude:
- os: macos-latest
python-version: '3.9'
Expand Down
31 changes: 28 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,15 @@ def get_minference_version() -> str:
return str(version)


def get_platform():
def get_arch():
"""
Returns the platform name as used in wheel filenames.
Returns the system aarch for the current system.
"""
if sys.platform.startswith("linux"):
return f"linux_{platform.uname().machine}"
if platform.machine() == "x86_64":
return "x86_64"
if platform.machine() == "arm64" or platform.machine() == "aarch64":
return "aarch64"
elif sys.platform == "darwin":
mac_version = ".".join(platform.mac_ver()[0].split(".")[:2])
return f"macosx_{mac_version}_x86_64"
Expand All @@ -134,6 +137,28 @@ def get_platform():
raise ValueError("Unsupported platform: {}".format(sys.platform))


def get_system() -> str:
"""
Returns the system name as used in wheel filenames.
"""
if platform.system() == "Windows":
return "win"
elif platform.system() == "Darwin":
mac_version = ".".join(platform.mac_ver()[0].split(".")[:1])
return f"macos_{mac_version}"
elif platform.system() == "Linux":
return "linux"
else:
raise ValueError("Unsupported system: {}".format(platform.system()))


def get_platform() -> str:
"""
Returns the platform name as used in wheel filenames.
"""
return f"{get_system()}_{get_arch()}"


def get_wheel_url():
# Determine the version numbers that will be used to determine the correct wheel
# We're using the CUDA version used to build torch, not the one currently installed
Expand Down