[Auto3DSeg] Add mlflow support in autorunner. #13025

Workflow file for this run

.github/workflows/pythonapp-gpu.yml at 0024d62

	# Jenkinsfile.monai-premerge
	name: premerge-gpu

	on:
	# quick tests for pull requests and the releasing branches
	push:
	branches:
	- main
	- releasing/*
	pull_request:
	types: [opened, synchronize, closed]

	concurrency:
	# automatically cancel the previously triggered workflows when there's a newer version
	group: build-gpu-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	GPU-quick-py3: # GPU with full dependencies
	# if: ${{ github.repository == 'Project-MONAI/MONAI' && github.event.pull_request.merged != true }}
	if: ${{ false }} # disable self-hosted job project-monai/monai#7039
	strategy:
	matrix:
	environment:
	- "PT19+CUDA114DOCKER"
	- "PT110+CUDA111"
	- "PT112+CUDA118DOCKER"
	- "PT113+CUDA116"
	- "PT210+CUDA121DOCKER"
	include:
	# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
	- environment: PT19+CUDA114DOCKER
	# 21.10: 1.10.0a0+0aef44c
	pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
	base: "nvcr.io/nvidia/pytorch:21.10-py3"
	- environment: PT110+CUDA111
	pytorch: "torch==1.10.2 torchvision==0.11.3 --extra-index-url https://download.pytorch.org/whl/cu111"
	base: "nvcr.io/nvidia/cuda:11.1.1-devel-ubuntu18.04"
	- environment: PT112+CUDA118DOCKER
	# 22.09: 1.13.0a0+d0d6b1f
	pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
	base: "nvcr.io/nvidia/pytorch:22.09-py3"
	- environment: PT113+CUDA116
	pytorch: "torch==1.13.1 torchvision==0.14.1"
	base: "nvcr.io/nvidia/cuda:11.6.1-devel-ubuntu18.04"
	- environment: PT210+CUDA121DOCKER
	# 23.08: 2.1.0a0+29c30b1
	pytorch: "-h" # we explicitly set pytorch to -h to avoid pip install error
	base: "nvcr.io/nvidia/pytorch:23.08-py3"
	container:
	image: ${{ matrix.base }}
	options: --gpus all --env NVIDIA_DISABLE_REQUIRE=true # workaround for unsatisfied condition: cuda>=11.6
	runs-on: [self-hosted, linux, x64, common]
	steps:
	- uses: actions/checkout@v4
	- name: apt install
	if: github.event.pull_request.merged != true
	run: \|
	apt-get update
	apt-get install -y wget

	if [ ${{ matrix.environment }} = "PT110+CUDA111" ] \|\| \
	[ ${{ matrix.environment }} = "PT113+CUDA116" ]
	then
	PYVER=3.8 PYSFX=3 DISTUTILS=python3-distutils && \
	apt-get update && apt-get install -y --no-install-recommends \
	curl \
	pkg-config \
	python$PYVER \
	python$PYVER-dev \
	python$PYSFX-pip \
	$DISTUTILS \
	rsync \
	swig \
	unzip \
	zip \
	zlib1g-dev \
	libboost-locale-dev \
	libboost-program-options-dev \
	libboost-system-dev \
	libboost-thread-dev \
	libboost-test-dev \
	libgoogle-glog-dev \
	libjsoncpp-dev \
	cmake \
	git && \
	rm -rf /var/lib/apt/lists/* && \
	export PYTHONIOENCODING=utf-8 LC_ALL=C.UTF-8 && \
	rm -f /usr/bin/python && \
	rm -f /usr/bin/python`echo $PYVER \| cut -c1-1` && \
	ln -s /usr/bin/python$PYVER /usr/bin/python && \
	ln -s /usr/bin/python$PYVER /usr/bin/python`echo $PYVER \| cut -c1-1` &&
	curl -O https://bootstrap.pypa.io/get-pip.py && \
	python get-pip.py && \
	rm get-pip.py;
	fi
	- if: matrix.environment == 'PT19+CUDA114DOCKER'
	name: Optional Cupy dependency (cuda114)
	run: echo "cupy-cuda114" >> requirements-dev.txt
	- name: Install dependencies
	if: github.event.pull_request.merged != true
	run: \|
	which python
	python -m pip install --upgrade pip wheel
	# fixes preinstalled ruamel_yaml error from the docker image
	rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
	rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/llvmlite* #6377
	python -m pip install ${{ matrix.pytorch }}
	python -m pip install -r requirements-dev.txt
	python -m pip list
	- name: Run quick tests (GPU)
	if: github.event.pull_request.merged != true
	run: \|
	git clone --depth 1 \
	https://github.com/Project-MONAI/MONAI-extra-test-data.git /MONAI-extra-test-data
	export MONAI_EXTRA_TEST_DATA="/MONAI-extra-test-data"
	nvidia-smi
	export LAUNCH_DELAY=$(python -c "import numpy; print(numpy.random.randint(30) * 10)")
	echo "Sleep $LAUNCH_DELAY"
	sleep $LAUNCH_DELAY
	export CUDA_VISIBLE_DEVICES=$(coverage run -m tests.utils \| tail -n 1)
	echo $CUDA_VISIBLE_DEVICES
	trap 'if pgrep python; then pkill python; fi;' ERR
	python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
	python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
	python -c 'import torch; print(torch.rand(5, 3, device=torch.device("cuda:0")))'
	python -c "import monai; monai.config.print_config()"
	# build for the current self-hosted CI Tesla V100
	BUILD_MONAI=1 TORCH_CUDA_ARCH_LIST="7.0" ./runtests.sh --build --disttests
	./runtests.sh --quick --unittests
	if [ ${{ matrix.environment }} = "PT113+CUDA116" ]; then
	# test the clang-format tool downloading once
	coverage run -m tests.clang_format_utils
	fi
	coverage xml --ignore-errors
	if pgrep python; then pkill python; fi
	shell: bash
	- name: Upload coverage
	if: ${{ github.head_ref != 'dev' && github.event.pull_request.merged != true }}
	uses: codecov/codecov-action@v3
	with:
	files: ./coverage.xml

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Auto3DSeg] Add mlflow support in autorunner. #13025

Workflow file

[Auto3DSeg] Add mlflow support in autorunner. #13025

Jobs

Run details

Workflow file for this run