.github/workflows/release.yml

name: Release

defaults:
  run:
    shell: bash -le {0}
on:
  schedule:
    - cron: '0 20 * * *'
  release:
    types: [ published ]
  repository_dispatch:
  workflow_dispatch:
    inputs:
      repo:
        description: 'GitHub repo {owner}/{repo}'
        required: false
        default: ''
      ref:
        description: 'GitHub ref: Branch, Tag or Commit SHA'
        required: false
        default: ''
      target:
        description: 'CUDA Torch Python version separated by space, check http://10.0.14.199/gpu/runner/docker to get all supported combinations'
        required: false
        default: ''
      max-parallel:
        description: 'max parallel jobs'
        required: false
        default: '{"size": 6}'
      upload_release:
        description: 'upload to release (it only works with a tag ref)'
        type: boolean
        required: false
        default: 'false'
      upload_pypi:
        description: 'upload to PyPI'
        type: boolean
        required: false
        default: 'false'

env:
  CUDA_DEVICE_ORDER: PCI_BUS_ID
  ZEN4_SERVER: 10.0.14.199
  TORCH_CUDA_ARCH_LIST: '6.0 6.1 6.2 7.0 7.5 8.0 8.6 8.9 9.0'
  CUDA_RELEASE: 1
  CI: 1
  GPTQMODEL_FORCE_BUILD: 1
  repo: ${{ github.event.inputs.repo || github.repository }}
  ref: ${{ github.event.inputs.ref || github.ref }}
  MAX_JOBS: 4

concurrency:
  group: ${{ github.event.inputs.ref || github.ref }}-workflow-release
  cancel-in-progress: true

jobs:
  check-vm:
    runs-on: self-hosted
    container:
      image: modelcloud/gptqmodel:alpine-ci-v1
    outputs:
      ip: ${{ steps.get_ip.outputs.ip }}
      zen4_list: ${{ steps.assign.outputs.zen4_list }}
    steps:
      - name: Print env
        run: |
          echo "event name: ${{ github.event_name }}"
          echo "repo: ${{ env.repo }}"
          echo "ref: ${{ env.ref }}"
          echo "max-parallel: ${{ inputs.max-parallel }}"
          echo "upload_release: ${{ inputs.upload_release }}"
          echo "upload_pypi: ${{ inputs.upload_pypi }}"

      - name: Select server
        id: get_ip
        run: |
          echo "ip=${ZEN4_SERVER}" >> "$GITHUB_OUTPUT"
          echo "GPU_IP=${ZEN4_SERVER}" >> $GITHUB_ENV
          echo "ip: $ip"

      - name: Assign tasks
        id: assign
        run: |
          input="${{ github.event.inputs.target }}"

          if [[ -z "$input" ]]; then
            server_lists=$(curl -s "http://${{ env.GPU_IP }}/gpu/runner/docker?json=1")
            echo "lists=$server_lists"

            IFS=$'+' read -r list_1 list_2 <<< "$server_lists"

            echo "list 1: $list_1"
            echo "list 2: $list_2"
          else
            echo "inputed target is: $input"

            if [[ "$input" =~ \  ]]; then
              echo "extracting cuda torch and python"
              read -r cuda torch py <<< "$input"
              task=compiler_cuda$cuda-torch$torch-python$py
              list_1="[\"$task\"]"
            else
              echo "use inputed image name"
              list_1="[\"$input\"]"
            fi
              list_2="[]"

            echo "task list: $list_1"
          fi

          echo "zen4_list=$list_1" >> "$GITHUB_OUTPUT"

  release-zen4:
    strategy:
      fail-fast: false
      matrix:
        tag: ${{ fromJSON(needs.check-vm.outputs.zen4_list) }}
      max-parallel: ${{ fromJson(inputs.max-parallel).size || 6 }}
    runs-on: [ self-hosted, zen4 ]
    needs:
      - check-vm
      - release-source
    if: needs.check-vm.outputs.zen4_list != '' && !cancelled()
    container:
      image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:${{ matrix.tag }}
    timeout-minutes: 70
    steps:
      - name: Checkout Codes
        uses: actions/checkout@v4
        with:
          repository: ${{ env.repo }}
          ref: ${{ env.ref }}

      - name: Print Env
        run: |
          echo "== pyenv =="
          pyenv versions
          echo "== python =="
          python --version
          echo "== nvcc =="
          #nvcc --version
          echo "== torch =="
          pip show torch

      - name: Install requirements
        run: |
          cuda_version=$(echo ${{ matrix.tag }} | grep -oP 'cuda\K[0-9.]+')
          torch_version=$(echo ${{ matrix.tag }} | grep -oP 'torch\K[0-9.]+')
          python_version=$(echo ${{ matrix.tag }} | grep -oP 'python\K[0-9.]+')
          bash -c "$(curl -L http://${ZEN4_SERVER}/scripts/compiler/init_env.sh)" @ $cuda_version $torch_version $python_version

      - name: Compile
        run: python setup.py bdist_wheel

      - name: Test install
        run: |
          ls -ahl dist
          whl=$(ls -t dist/*.whl | head -n 1 | xargs basename)
          echo "WHL_NAME=$whl" >> $GITHUB_ENV

          twine check dist/$whl
          uv pip install dist/$whl

      - name: Upload wheel
        continue-on-error: true
        run: |
          sha256=$(sha256sum dist/${{ env.WHL_NAME }})
          response=$(curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=$sha256" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload)
          if [ "$response" -eq 0 ]; then
            echo "UPLOADED=1" >> $GITHUB_ENV
          fi

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        continue-on-error: ${{ env.UPLOADED == '1' }}
        with:
          name: ${{ env.WHL_NAME }}
          path: dist/${{ env.WHL_NAME }}

      - name: Upload binaries to release
        uses: svenstaro/upload-release-action@v2
        if: (github.event_name == 'release' || github.event.inputs.upload_release == 'true') && !cancelled()
        with:
          repo_name: ${{ env.repo }}
          tag: ${{ env.ref }}
          file: dist/${{ env.WHL_NAME }}
          file_glob: true
          overwrite: true

  release-source:
    strategy:
      fail-fast: false
    runs-on: self-hosted
    needs: check-vm
    container:
      image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:compiler_cuda124-torch2.4.1-python311
    env:
      CUDA_RELEASE: 0
    steps:
      - name: Checkout Codes
        uses: actions/checkout@v4
        with:
          repository: ${{ env.repo }}
          ref: ${{ env.ref }}

      - name: Install requirements
        run: uv pip install build setuptools -U -i http://${{ needs.check-vm.outputs.ip }}/simple/ --trusted-host ${{ needs.check-vm.outputs.ip }}

      - name: Compile
        run: python -m build --no-isolation --sdist

      - name: Check dist
        run: |
          ls -ahl dist
          whl=$(ls -t dist/*.gz | head -n 1 | xargs basename)
          echo "WHL_NAME=$whl" >> $GITHUB_ENV

          twine check dist/$whl

      - name: Upload to local
        continue-on-error: true
        run: |
          sha256=$(sha256sum dist/${{ env.WHL_NAME }})
          response=$(curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=$sha256" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload)
          if [ "$response" -eq 0 ]; then
            echo "UPLOADED=1" >> $GITHUB_ENV
          fi

      - name: Upload to artifact
        uses: actions/upload-artifact@v4
        continue-on-error: ${{ env.UPLOADED == '1' }}
        with:
          name: ${{ env.WHL_NAME }}
          path: dist/${{ env.WHL_NAME }}

      - name: Upload package to release
        uses: svenstaro/upload-release-action@v2
        if: (github.event_name == 'release' || github.event.inputs.upload_release == 'true') && !cancelled()
        with:
          file: dist/${{ env.WHL_NAME }}
          tag: ${{ env.ref }}
          file_glob: true
          overwrite: true

      - name: Upload sdist to pypi
        if: (github.event_name == 'release' || github.event.inputs.upload_pypi == 'true') && !cancelled()
        env:
          TWINE_USERNAME: "__token__"
          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
        run: |
          python -m twine upload dist/*gz