Skip to content

Commit

Permalink
Merge pull request #1 from LUMC/init
Browse files Browse the repository at this point in the history
Initialize miniwdl-slurm plugin
  • Loading branch information
rhpvorderman authored Aug 19, 2022
2 parents 482d627 + ea38919 commit af42211
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 2 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Continous integration

on:
pull_request:
push:
branches:
- develop
- master

jobs:
lint:
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.6"
steps:
- uses: actions/checkout@v2.3.4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install tox
run: pip install tox
- name: Lint
run: tox -e lint
2 changes: 0 additions & 2 deletions README.md

This file was deleted.

51 changes: 51 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
miniwdl-slurm
=============
Extends miniwdl to run workflows on SLURM clusters in singularity containers.

This `SLURM backend
<https://miniwdl.readthedocs.io/en/latest/runner_backends.html>`_ plugin for
`miniwdl <https://github.com/chanzuckerberg/miniwdl>`_ runs WDL task containers
by creating a job script that is submitted to a SLURM cluster. In case the job
description has a container, singularity will be used as container runtime.

Installation
------------

.. code-block::
pip install git+https://github.com/LUMC/miniwdl-slurm.git
Configuration
--------------
The following information should be set in the `miniwdl configuration
<https://miniwdl.readthedocs.io/en/latest/runner_reference.html#configuration>`_:

.. code-block:: ini
[scheduler]
container_backend=slurm_singularity
# task_concurrency defaults to the number of processors on the system.
# since we submit the jobs to SLURM this is not necessary.
# higher numbers means miniwdl has to monitor more processes simultaneously
# which might impact performance.
task_concurrency=200
[singularity]
# This plugin wraps the singularity backend. Make sure the settings are
# appropriate for your cluster.
exe = ["singularity"]
# the miniwdl default options contain options to run as a fake root, which
# is not available on most clusters.
run_options = [
"--containall"
]
# Location of the singularity images (optional). The miniwdl-slurm plugin
# will set it to a directory inside $PWD. This location must be reachable
# for the submit nodes.
image_cache = "$PWD/miniwdl_singularity_cache"
[slurm]
# extra arguments passed to the srun command (optional).
extra_args="--partition heavy_users,gpu --comment 'run with miniwdl'"
43 changes: 43 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2022 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from setuptools import find_packages, setup

with open("README.rst") as fp:
long_description = fp.read()

setup(
name="miniwdl-slurm",
version="v0.1.0-dev",
description="miniwdl slurm singularity backend",
long_description=long_description,
long_description_content_type="text/x-rst",
author="Leiden University Medical Center",
author_email="sasc <at> lumc.nl",
python_requires=">=3.6",
packages=find_packages('src'),
package_dir={'': 'src'},
install_requires=["miniwdl>=1.6.0"],
entry_points={
"miniwdl.plugin.container_backend": [
"slurm_singularity=miniwdl_slurm:SlurmSingularity"
],
},
)
115 changes: 115 additions & 0 deletions src/miniwdl_slurm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright (c) 2022 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging
import os
import shlex
import sys
from contextlib import ExitStack
from typing import Dict, List

from WDL import Type, Value
from WDL.runtime import config
from WDL.runtime.backend.cli_subprocess import _SubprocessScheduler
from WDL.runtime.backend.singularity import SingularityContainer


class SlurmSingularity(SingularityContainer):
@classmethod
def global_init(cls, cfg: config.Loader, logger: logging.Logger) -> None:
# Set resources to maxsize. The base class (_SubProcessScheduler)
# looks at the resources of the current host, but since we are
# dealing with a cluster these limits do not apply.
cls._resource_limits = {
"cpu": sys.maxsize,
"mem_bytes": sys.maxsize,
"time": sys.maxsize,
}
_SubprocessScheduler.global_init(cls._resource_limits)
# Since we run on the cluster, the images need to be placed in a
# shared directory. The singularity cache itself cannot be shared
# across nodes, as it can become corrupted when nodes pull the same
# image. The solution is to pull image to a shared directory on the
# submit node. If no image_cache is given, simply place a folder in
# the current working directory.
if cfg.get("singularity", "image_cache") == "":
cfg.override(
{"singularity": {
"image_cache": os.path.join(os.getcwd(),
"miniwdl_singularity_cache")
}}
)
SingularityContainer.global_init(cfg, logger)

@classmethod
def detect_resource_limits(cls, cfg: config.Loader,
logger: logging.Logger) -> Dict[str, int]:
return cls._resource_limits # type: ignore

@property
def cli_name(self) -> str:
return "slurm_singularity"

def process_runtime(self,
logger: logging.Logger,
runtime_eval: Dict[str, Value.Base]) -> None:
"""Any non-default runtime variables can be parsed here"""
super().process_runtime(logger, runtime_eval)
if "time_minutes" in runtime_eval:
time_minutes = runtime_eval["time_minutes"].coerce(Type.Int()).value
self.runtime_values["time_minutes"] = time_minutes

def _slurm_invocation(self):
# We use srun as this makes the submitted job behave like a local job.
# This also gives informative exit codes back, including 253 for out
# of memory.
srun_args = [
"srun",
"--job-name", self.run_id,
]

cpu = self.runtime_values.get("cpu", None)
if cpu is not None:
srun_args.extend(["--cpus-per-task", str(cpu)])

memory = self.runtime_values.get("memory_reservation", None)
if memory is not None:
# Round to the nearest megabyte.
srun_args.extend(["--mem", f"{round(memory / (1024 ** 2))}M"])

time_minutes = self.runtime_values.get("time_minutes", None)
if time_minutes is not None:
srun_args.extend(["--time", str(time_minutes)])

if self.cfg.has_section("slurm"):
extra_args = self.cfg.get("slurm", "extra_args")
if extra_args is not None:
srun_args.extend(shlex.split(extra_args))
return srun_args

def _run_invocation(self, logger: logging.Logger, cleanup: ExitStack,
image: str) -> List[str]:
singularity_command = super()._run_invocation(logger, cleanup, image)

slurm_invocation = self._slurm_invocation()
slurm_invocation.extend(singularity_command)
logger.info("Slurm invocation: " + ' '.join(
shlex.quote(part) for part in slurm_invocation))
return slurm_invocation
12 changes: 12 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[tox]

[testenv:lint]
deps=flake8
flake8-import-order
mypy
commands =
flake8 src setup.py
mypy src

[flake8]
max-line-length = 88

0 comments on commit af42211

Please sign in to comment.