miniwdl_aws_studio.cfg

# miniwdl configuration file for use within SageMaker Studio, where the further necessary
# infrastructure will be detected contextually:
#   1. FSAP matching the Studio domain, EFS, and user profile
#   2. Batch queue with the tag MiniwdlStudioEfsId set to the Studio domain's EFS ID
#
# For info about where to place this file, and other available options:
#   https://miniwdl.readthedocs.io/en/latest/runner_reference.html#configuration

[scheduler]
container_backend = aws_batch_job
# One `miniwdl run` process will be able to orchestrate this many concurrent AWS Batch jobs. (This
# controls the size of a thread pool, so setting it too high tends to be counterproductive.)
call_concurrency = 100
# Reduced concurrency limit for URI download jobs; since these are typically fast S3 downloads,
# running many concurrently is likely to overstress EFS.
download_concurrency = 10

[file_io]
# Run directories and all input files must reside within this shared directory
root = $HOME

[task_runtime]
# Default policy to retry spot-terminated jobs (up to three total attempts)
defaults = {
        "docker": "ubuntu:20.04",
        "preemptible": 2
    }

[call_cache]
# Cache call outputs in EFS folder (valid so long as all referenced input & output files remain
# unmodified on EFS).
dir = $HOME/miniwdl/_CACHE/call
get = true
put = true

[download_cache]
dir = $HOME/miniwdl/_CACHE/download
get = true
put = false
# disable flock on files in use from download cache, due to low EFS limits on flocks
flock = false

[aws]
# Last-resort job timeout (seconds) for AWS Batch to enforce
job_timeout = 864000
# Internal rate limiting periods (seconds) for AWS Batch API requests
# (may need to be increased if many concurrent workflow runs are planned)
submit_period = 1
describe_period = 1
# Wait this many seconds before retrying a job after a spot instance interruption or other
# retryable failure. Provides a time window for convergence of any "eventually consistent"
# activities from the first attempt (involving e.g. EFS, CloudWatch Logs, etc.).
retry_wait = 60