Skip to content

Commit

Permalink
Merge pull request #87 from sanger-tol/prod_fix
Browse files Browse the repository at this point in the history
Production fixes
  • Loading branch information
muffato authored Feb 29, 2024
2 parents 91efc18 + bba028e commit a27e9ce
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 30 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [[1.2.1](https://github.com/sanger-tol/readmapping/releases/tag/1.2.1)] - [2024-02-29]

### Enhancements & fixes

- Increased the memory requests for reruns of BWAMEM2_MEM and SAMTOOLS_SORMADUP.

## [[1.2.0](https://github.com/sanger-tol/readmapping/releases/tag/1.2.0)] – Norwegian Ridgeback - [2023-12-19]

### Enhancements & fixes
Expand Down
31 changes: 2 additions & 29 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,6 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Increasing the number of CPUs often gives diminishing returns, so we increase it
following a logarithm curve. Example:
- 0 < value <= 1: start + step
- 1 < value <= 2: start + 2*step
- 2 < value <= 4: start + 3*step
- 4 < value <= 8: start + 4*step
In order to support re-runs, the step increase may be multiplied by the attempt
number prior to calling this function.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Modified logarithm function that doesn't return negative numbers
def positive_log(value, base) {
if (value <= 1) {
return 0
} else {
return Math.log(value)/Math.log(base)
}
}

def log_increase_cpus(start, step, value, base) {
return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus')
}


process {

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
Expand Down Expand Up @@ -82,7 +55,7 @@ process {

withName: 'SAMTOOLS_SORMADUP' {
cpus = { log_increase_cpus(2, 6*task.attempt, 1, 2) }
memory = { check_max( 10.GB + 0.6.GB * Math.ceil( meta.read_count / 100000000 ) * task.attempt, 'memory' ) }
memory = { check_max( 4.GB + 850.MB * log_increase_cpus(2, 6*task.attempt, 1, 2) * task.attempt + 0.6.GB * Math.ceil( meta.read_count / 100000000 ), 'memory' ) }
time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) }
}

Expand Down Expand Up @@ -113,7 +86,7 @@ process {
time = { check_max( 3.h * task.attempt * Math.ceil(positive_log(meta2.genome_size/100000, 10)) * Math.ceil(meta.read_count/1000000000) * 12 / log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'time' ) }
// Base RAM usage is about 6 times the genome size. Each thread takes an additional 800 MB RAM
// Memory usage of SAMTOOLS_VIEW is negligible.
memory = { check_max( 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 800.MB * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) }
memory = { check_max( 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 800.MB * task.attempt * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) }
}

withName: MINIMAP2_ALIGN {
Expand Down
29 changes: 28 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ manifest {
description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.'
mainScript = 'main.nf'
nextflowVersion = '!>=22.10.1'
version = '1.2.0'
version = '1.2.1'
doi = '10.5281/zenodo.6563577'
}

Expand Down Expand Up @@ -222,3 +222,30 @@ def check_max(obj, type) {
}
}
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Increasing the number of CPUs often gives diminishing returns, so we increase it
following a logarithm curve. Example:
- 0 < value <= 1: start + step
- 1 < value <= 2: start + 2*step
- 2 < value <= 4: start + 3*step
- 4 < value <= 8: start + 4*step
In order to support re-runs, the step increase may be multiplied by the attempt
number prior to calling this function.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Modified logarithm function that doesn't return negative numbers
def positive_log(value, base) {
if (value <= 1) {
return 0
} else {
return Math.log(value)/Math.log(base)
}
}

def log_increase_cpus(start, step, value, base) {
return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus')
}

0 comments on commit a27e9ce

Please sign in to comment.