Skip to content

Fix moe h100

Fix moe h100 #10

Workflow file for this run

name: Build Kernels
on:
pull_request:
jobs:
build:
runs-on: [self-hosted, cpu]
steps:
- uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: environment
run: |
python --version
nvcc --version
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: build kernels
run: |
ts=$(date +%s)
DS_KERNELS_MAKE_JOBS=10 DS_KERNELS_BUILD_STRING=".dev${ts}" CUDA_ARCH_LIST="80;86;89;90" python setup.py bdist_wheel
fname=$(ls dist)
nname=$(echo $fname | sed 's/cp[0-9]\+-cp[0-9]\+/py3-none/' | sed 's/linux/manylinux1/')
mv "dist/$fname" "dist/$nname"
ls -al
- uses: actions/upload-artifact@v3
with:
name: deepspeed-kernels-whl
path: dist/*.whl