Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add generate-SQC-BAM #174

Merged
merged 22 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@
url = git@github.com:uclahs-cds/pipeline-call-sCNA.git
branch = main
ignore = dirty
[submodule "external/pipeline-generate-SQC-BAM"]
path = external/pipeline-generate-SQC-BAM
url = git@github.com:uclahs-cds/pipeline-generate-SQC-BAM.git
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
---

## [Unreleased]
### Added
+ BAM sample QC pipeline `v1.0.0`
### Changed
+ Call-sSNV: `8.0.0-rc.1` -> `8.0.0`
+ Call-gSNP: `10.0.0-rc.3` -> `10.0.0`
Expand Down
6 changes: 5 additions & 1 deletion config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ methods {
params.version_call_gSV = get_submodule_version('pipeline-call-gSV')
params.version_call_sSV = get_submodule_version('pipeline-call-sSV')
params.version_call_sCNA = get_submodule_version('pipeline-call-sCNA')
params.version_generate_SQC_BAM = get_submodule_version('pipeline-generate-SQC-BAM')
}

set_env = {
Expand Down Expand Up @@ -280,6 +281,7 @@ methods {
'align_DNA': 'align-DNA',
'recalibrate_BAM': 'recalibrate-BAM',
'calculate_targeted_coverage': 'calculate-targeted-coverage',
'generate_SQC_BAM': 'generate-SQC-BAM',
'call_gSNP': 'call-gSNP',
'call_sSNV': 'call-sSNV',
'call_mtSNV': 'call-mtSNV',
Expand Down Expand Up @@ -345,7 +347,8 @@ methods {
'pipeline-call-gSNP': ['dataset_id', 'patient_id', 'output_dir', 'input'],
'pipeline-call-sSNV': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'work_dir', 'input'],
'pipeline-call-sSV': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-call-sCNA': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input']
'pipeline-call-sCNA': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input'],
'pipeline-generate-SQC-BAM': ['dataset_id', 'patient_id', 'sample_id', 'output_dir', 'input']
]

pipeline_param_exclusion.each { pipeline, to_exclude ->
Expand Down Expand Up @@ -475,6 +478,7 @@ methods {
resolve_pipeline_ordering = {
List pipelines = [
'calculate-targeted-coverage',
'generate-SQC-BAM',
'call-gSNP',
'call-gSV',
'call-mtSNV',
Expand Down
1 change: 1 addition & 0 deletions config/pipeline_selector.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pipeline_selector {
def dependencies = [
'recalibrate-BAM': ['align-DNA'],
'calculate-targeted-coverage': ['recalibrate-BAM'],
'generate-SQC-BAM': ['recalibrate-BAM'],
'call-gSNP': ['recalibrate-BAM'],
'call-sSNV': ['recalibrate-BAM'],
'call-mtSNV': ['recalibrate-BAM'],
Expand Down
11 changes: 11 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ downstream_pipeline_order:
required: false
choices:
- calculate-targeted-coverage
- generate-SQC-BAM
- call-gSNP
- call-gSV
- call-mtSNV
Expand All @@ -71,6 +72,7 @@ requested_pipelines:
default:
- align-DNA
- recalibrate-BAM
- generate-SQC-BAM
- call-gSNP
- call-gSV
- call-mtSNV
Expand All @@ -81,6 +83,7 @@ requested_pipelines:
- align-DNA
- recalibrate-BAM
- calculate-targeted-coverage
- generate-SQC-BAM
- call-gSNP
- call-gSV
- call-mtSNV
Expand Down Expand Up @@ -169,6 +172,14 @@ pipeline_params:
type: 'Namespace'
required: true
help: 'Parameters for call-sCNA'
calculate_targeted_coverage:
type: 'Namespace'
required: true
help: 'Parameters for calculate-targeted-coverage'
generate_SQC_BAM:
type: 'Namespace'
required: true
help: 'Parameters for generated-SQC-BAM'
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved
input:
type: 'InputNamespace'
required: true
Expand Down
9 changes: 7 additions & 2 deletions config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ params {

sample_mode = 'paired' // Choose from: 'single', 'paired', 'multi'

// Select pipeline(s) to run. Choices: 'align-DNA', 'recalibrate-BAM', 'calculate-targeted-coverage', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV'
requested_pipelines = ['align-DNA', 'recalibrate-BAM', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV']
// Select pipeline(s) to run. Choices: 'align-DNA', 'recalibrate-BAM', 'generate-SQC-BAM', 'calculate-targeted-coverage', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV'
requested_pipelines = ['align-DNA', 'recalibrate-BAM', 'generate-SQC-BAM', 'call-gSNP', 'call-mtSNV', 'call-sSNV', 'call-sSV', 'call-gSV']

// Override conversion to FASTQ and re-alignment with BAM input
override_realignment = false
Expand Down Expand Up @@ -73,6 +73,11 @@ params {
target_depth = false // whether or not to calculate per-base depth in target regions
}

generate_SQC_BAM {
algorithms = ['stats', 'collectwgsmetrics']
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
}

call_gSNP {
reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
bundle_mills_and_1000g_gold_standard_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
Expand Down
1 change: 1 addition & 0 deletions external/pipeline-generate-SQC-BAM
2 changes: 1 addition & 1 deletion main.nf
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ log.info """\
version: ${workflow.manifest.version}

- input:
input input_csv: ${params.input_csv}
input input_csv: ${params.containsKey('input_csv') ? params.input_csv : 'YAML input used.'}
input project_id: ${params.project_id}

- output:
Expand Down
70 changes: 70 additions & 0 deletions module/generate_SQC_BAM/create_YAML_generate_SQC_BAM.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import org.yaml.snakeyaml.Yaml
/*
* Create input YAML file for the generate-SQC-BAM pipeline.
*
* Input:
* sample_info: A Map object containing sample information split into normal and tumor
*
* Output:
* @return Path to input_yaml
*/
process create_YAML_generate_SQC_BAM {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${patient_id}",
pattern: 'generate_SQC_BAM_input.yaml',
mode: 'copy'

input:
val(sample_info)

output:
tuple val(patient_id), path(input_yaml)

exec:
input_yaml = 'generate_SQC_BAM_input.yaml'
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved

sample_states = ['normal': [], 'tumor': []]
sample_info.each { state, samples ->
samples.each{ sample ->
sample_states[state].add(sample.sample)

}
}

single_sample_type = 'none'
if (sample_info.tumor.isEmpty()) {
single_sample_type = 'normal'
} else {
single_sample_type = 'tumor'
}

patient_id = ''
if (params.sample_mode == 'single') {
assert sample_info[single_sample_type].sample.size() == 1
patient_id = sample_info[single_sample_type].sample[0]
} else {
patient_id = params.patient
}

if (params.sample_mode == 'single') {
input_map = [
'patient_id': patient_id,
'input': [
'BAM': [
("${single_sample_type}" as String) : sample_info[single_sample_type].collect{ ['path': ("${it['bam']}" as String)] }
]
]
]
} else {
input_map = [
'patient_id': patient_id,
'input': [
'BAM': [
'normal': sample_info.normal.collect{ ['path': ("${it['bam']}" as String)] },
'tumor': sample_info.tumor.collect{ ['path': ("${it['bam']}" as String)] }
]
]
]
}
Yaml yaml = new Yaml()
yaml.dump(input_map, new FileWriter("${task.workDir}/${input_yaml}"))
}
36 changes: 36 additions & 0 deletions module/generate_SQC_BAM/default.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// EXECUTION SETTINGS AND GLOBAL DEFAULTS

// External config files import. DO NOT MODIFY THESE LINES!
includeConfig "${projectDir}/config/default.config"
includeConfig "${projectDir}/config/methods.config"
includeConfig "${projectDir}/nextflow.config"


// Inputs/parameters of the pipeline
params {
algorithms = ['stats', 'collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
blcds_registered_dataset = false // if you want the output to be registered
save_intermediate_files = true

// SAMtools stats options
samtools_remove_duplicates = false
samtools_stats_additional_options = ''

// Picard CollectWgsMetrics options
cwm_coverage_cap = 1000
cwm_minimum_mapping_quality = 20
cwm_minimum_base_quality = 20
cwm_use_fast_algorithm = false
cwm_additional_options = ''

// Qualimap bamqc options
bamqc_outformat = 'pdf' // 'html' or 'pdf'
bamqc_additional_options = ''

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
}

// Setup the pipeline config. DO NOT REMOVE THIS LINE!
methods.setup()
43 changes: 43 additions & 0 deletions module/generate_SQC_BAM/run_generate_SQC_BAM.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
include { combine_input_with_params } from '../common.nf'
/*
* Call the generate-SQC-BAM pipeline
*
* Input:
* input_yaml: The input YAML file
*/
process run_generate_SQC_BAM {
cpus params.generate_SQC_BAM.subworkflow_cpus

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${resolved_id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "generate-SQC-BAM-*/*"
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved

input:
tuple val(resolved_id), path(input_yaml)

output:
file "generate-SQC-BAM-*/*"
file ".command.*"
val('done'), emit: complete

script:
String params_to_dump = combine_input_with_params(params.generate_SQC_BAM.metapipeline_arg_map, new File(input_yaml.toRealPath().toString()))
"""
set -euo pipefail

printf "${params_to_dump}" > combined_generate_sqc_bam_params.yaml

nextflow run \
${moduleDir}/../../external/pipeline-generate-SQC-BAM/main.nf \
-params-file combined_generate_sqc_bam_params.yaml \
--work_dir ${params.work_dir} \
--output_dir \$(pwd) \
--dataset_id ${params.project_id} \
-c ${moduleDir}/default.config
"""
}
56 changes: 56 additions & 0 deletions module/generate_SQC_BAM/workflow.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
Main entry point for generating BAM SQC
*/
include { create_YAML_generate_SQC_BAM } from "${moduleDir}/create_YAML_generate_SQC_BAM"
include { run_generate_SQC_BAM } from "${moduleDir}/run_generate_SQC_BAM" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
* Main workflow for generating BAM SQC
*
* Input:
* Input is a channel containing the samples split by type
*/
workflow generate_SQC_BAM {
take:
modification_signal
main:
// Watch for pipeline ordering
Channel.watchPath( "${params.pipeline_status_directory}/*.complete" )
.until{ it -> it.name == "${params.pipeline_predecessor['generate-SQC-BAM']}.complete" }
.ifEmpty('done')
.collect()
.map{ 'done' }
.set{ pipeline_predecessor_complete }

// Extract inputs from data structure
modification_signal.until{ it == 'done' }.ifEmpty('done')
.mix(pipeline_predecessor_complete)
.collect()
.map{ it ->
def samples = [];
params.sample_data.each { s, s_data ->
samples.add(['patient': s_data['patient'], 'sample': s, 'state': s_data['state'], 'bam': s_data['recalibrate-BAM']['BAM']]);
};
return samples
}
.flatten()
.reduce(['normal': [] as Set, 'tumor': [] as Set]) { a, b ->
a[b.state] += b;
return a
}
.set{ ich }

create_YAML_generate_SQC_BAM(ich)

run_generate_SQC_BAM(create_YAML_generate_SQC_BAM.out)

run_generate_SQC_BAM.out.complete
.mix( pipeline_predecessor_complete )
.collect()
.map{ it ->
mark_pipeline_complete('generate-SQC-BAM');
return 'done';
}
.set{ completion_signal }
}
5 changes: 5 additions & 0 deletions module/metapipeline_DNA.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ include { convert_BAM2FASTQ } from "${moduleDir}/convert_BAM2FASTQ/workflow"
include { align_DNA } from "${moduleDir}/align_DNA/workflow"
include { recalibrate_BAM } from "${moduleDir}/recalibrate_BAM/workflow"
include { calculate_targeted_coverage } from "${moduleDir}/calculate_targeted_coverage/workflow"
include { generate_SQC_BAM } from "${moduleDir}/generate_SQC_BAM/workflow"
include { call_gSNP } from "${moduleDir}/call_gSNP/workflow"
include { call_sSNV } from "${moduleDir}/call_sSNV/workflow"
include { call_mtSNV } from "${moduleDir}/call_mtSNV/workflow"
Expand Down Expand Up @@ -43,6 +44,10 @@ workflow {
calculate_targeted_coverage(recalibrate_BAM.out.recalibrate_sample_data_updated)
}

if (params.generate_SQC_BAM.is_pipeline_enabled) {
generate_SQC_BAM(recalibrate_BAM.out.recalibrate_sample_data_updated)
}

if (params.call_gSNP.is_pipeline_enabled) {
call_gSNP(recalibrate_BAM.out.recalibrate_sample_data_updated)
}
Expand Down
20 changes: 20 additions & 0 deletions nftest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,26 @@ cases:
expect: /hot/software/pipeline/metapipeline-DNA/Nextflow/development/output/calculate-targeted-coverage-1.0.0-rc.2/NA24149/SAMtools-1.16.1/output/NA24149.target_with_enriched_off-target_intervals.bed
script: test/test-calculate-targeted-coverage/assert_bed.sh

- name: test-generate-SQC-BAM
message: Test generate-SQC-BAM pipeline
nf_script: test/test-generate-SQC-BAM/test.nf
nf_config: test/test-generate-SQC-BAM/test.config
verbose: true
skip: true
asserts:
- actual: output/generate-SQC-BAM-*/IlluminaPatient1/SAMtools-*/output/SAMtools-*_IlluminaTest_NA24143_stats.txt
expect: /hot/software/pipeline/metapipeline-DNA/Nextflow/development/output/generate-SQC-BAM-1.0.0/IlluminaPatient1/SAMtools-1.18/output/SAMtools-1.18_IlluminaTest_NA24143_stats.txt
method: md5
- actual: output/generate-SQC-BAM-*/IlluminaPatient1/SAMtools-*/output/SAMtools-*_IlluminaTest_NA24149_stats.txt
expect: /hot/software/pipeline/metapipeline-DNA/Nextflow/development/output/generate-SQC-BAM-1.0.0/IlluminaPatient1/SAMtools-1.18/output/SAMtools-1.18_IlluminaTest_NA24149_stats.txt
method: md5
- actual: output/generate-SQC-BAM-*/IlluminaPatient1/Picard-*/output/Picard-*_IlluminaTest_NA24143_wgs-metrics.txt
expect: /hot/software/pipeline/metapipeline-DNA/Nextflow/development/output/generate-SQC-BAM-1.0.0/IlluminaPatient1/Picard-3.1.0/output/Picard-3.1.0_IlluminaTest_NA24143_wgs-metrics.txt
script: test/test-generate-SQC-BAM/assert_metrics.sh
- actual: output/generate-SQC-BAM-*/IlluminaPatient1/Picard-*/output/Picard-*_IlluminaTest_NA24149_wgs-metrics.txt
expect: /hot/software/pipeline/metapipeline-DNA/Nextflow/development/output/generate-SQC-BAM-1.0.0/IlluminaPatient1/Picard-3.1.0/output/Picard-3.1.0_IlluminaTest_NA24149_wgs-metrics.txt
script: test/test-generate-SQC-BAM/assert_metrics.sh

- name: test-call-gSNP
message: Test call-gSNP with one tumor normal pair
nf_script: test/test-call-gSNP/test.nf
Expand Down
Loading
Loading