-
Notifications
You must be signed in to change notification settings - Fork 1.1k
142 lines (138 loc) · 5.95 KB
/
pythonapp-gpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# Jenkinsfile.monai-premerge
name: premerge-gpu
on:
# quick tests for pull requests and the releasing branches
push:
branches:
- main
- releasing/*
pull_request:
types: [opened, synchronize, closed]
concurrency:
# automatically cancel the previously triggered workflows when there's a newer version
group: build-gpu-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
GPU-quick-py3: # GPU with full dependencies
# if: ${{ github.repository == 'Project-MONAI/MONAI' && github.event.pull_request.merged != true }}
if: ${{ false }} # disable self-hosted job project-monai/monai#7039
strategy:
matrix:
environment:
- "PT19+CUDA114DOCKER"
- "PT110+CUDA111"
- "PT112+CUDA118DOCKER"
- "PT113+CUDA116"
- "PT210+CUDA121DOCKER"
include:
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
- environment: PT110+CUDA111
pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu111"
base: "nvcr.io/nvidia/cuda:11.1.1-devel-ubuntu18.04"
- environment: PT112+CUDA118DOCKER
# 22.09: 1.13.0a0+d0d6b1f
pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
base: "nvcr.io/nvidia/pytorch:22.09-py3"
- environment: PT113+CUDA116
pytorch: "torch==1.13.1 torchvision==0.14.1"
base: "nvcr.io/nvidia/cuda:11.6.1-devel-ubuntu18.04"
- environment: PT210+CUDA121DOCKER
# 23.08: 2.1.0a0+29c30b1
pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
base: "nvcr.io/nvidia/pytorch:23.08-py3"
- environment: PT210+CUDA121DOCKER
# 24.03: 2.3.0a0+40ec155e58.nv24.3
pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
base: "nvcr.io/nvidia/pytorch:24.03-py3"
container:
image: ${{ matrix.base }}
options: --gpus all --env NVIDIA_DISABLE_REQUIRE=true # workaround for unsatisfied condition: cuda>=11.6
runs-on: [self-hosted, linux, x64, common]
steps:
- uses: actions/checkout@v4
- name: apt install
if: github.event.pull_request.merged != true
run: |
apt-get update
apt-get install -y wget
if [ ${{ matrix.environment }} = "PT110+CUDA111" ] || \
[ ${{ matrix.environment }} = "PT113+CUDA116" ]
then
PYVER=3.9 PYSFX=3 DISTUTILS=python3-distutils && \
apt-get update && apt-get install -y --no-install-recommends \
curl \
pkg-config \
python$PYVER \
python$PYVER-dev \
python$PYSFX-pip \
$DISTUTILS \
rsync \
swig \
unzip \
zip \
zlib1g-dev \
libboost-locale-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-thread-dev \
libboost-test-dev \
libgoogle-glog-dev \
libjsoncpp-dev \
cmake \
git && \
rm -rf /var/lib/apt/lists/* && \
export PYTHONIOENCODING=utf-8 LC_ALL=C.UTF-8 && \
rm -f /usr/bin/python && \
rm -f /usr/bin/python`echo $PYVER | cut -c1-1` && \
ln -s /usr/bin/python$PYVER /usr/bin/python && \
ln -s /usr/bin/python$PYVER /usr/bin/python`echo $PYVER | cut -c1-1` &&
curl -O https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py && \
rm get-pip.py;
fi
- if: matrix.environment == 'PT19+CUDA114DOCKER'
name: Optional Cupy dependency (cuda114)
run: echo "cupy-cuda114" >> requirements-dev.txt
- name: Install dependencies
if: github.event.pull_request.merged != true
run: |
which python
python -m pip install --upgrade pip wheel
# fixes preinstalled ruamel_yaml error from the docker image
rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/llvmlite* #6377
python -m pip install ${{ matrix.pytorch }}
python -m pip install -r requirements-dev.txt
python -m pip list
- name: Run quick tests (GPU)
if: github.event.pull_request.merged != true
run: |
git clone --depth 1 \
https://github.com/Project-MONAI/MONAI-extra-test-data.git /MONAI-extra-test-data
export MONAI_EXTRA_TEST_DATA="/MONAI-extra-test-data"
nvidia-smi
export LAUNCH_DELAY=$(python -c "import numpy; print(numpy.random.randint(30) * 10)")
echo "Sleep $LAUNCH_DELAY"
sleep $LAUNCH_DELAY
export CUDA_VISIBLE_DEVICES=$(coverage run -m tests.utils | tail -n 1)
echo $CUDA_VISIBLE_DEVICES
trap 'if pgrep python; then pkill python; fi;' ERR
python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
python -c "import monai; monai.config.print_config()"
# build for the current self-hosted CI Tesla V100
BUILD_MONAI=1 TORCH_CUDA_ARCH_LIST="7.0" ./runtests.sh --build --disttests
./runtests.sh --quick --unittests
if [ ${{ matrix.environment }} = "PT113+CUDA116" ]; then
# test the clang-format tool downloading once
coverage run -m tests.clang_format_utils
fi
coverage xml --ignore-errors
if pgrep python; then pkill python; fi
shell: bash
- name: Upload coverage
if: ${{ github.head_ref != 'dev' && github.event.pull_request.merged != true }}
uses: codecov/codecov-action@v4
with:
files: ./coverage.xml