Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save input files and logs #168

Merged
merged 34 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2b8c295
Put CSVs generated for metapipeline in one directory
yashpatel6 Feb 22, 2024
340594e
Add publishDir rules for saving generated config and for statu check …
yashpatel6 Feb 22, 2024
2c0b3f8
Always save CSV for align-DNA
yashpatel6 Feb 22, 2024
feda723
Always save YAML for call-gSNP
yashpatel6 Feb 22, 2024
4f1484a
Always save CSV for call-gSV
yashpatel6 Feb 22, 2024
fee89ad
Always save CSV for call-mtSNV
yashpatel6 Feb 22, 2024
25837cf
Always save YAML for call-sSNV
yashpatel6 Feb 22, 2024
c99e729
Always save YAML for call sSV
yashpatel6 Feb 22, 2024
57f71d6
Save CSV for convert-BAM2FASTQ
yashpatel6 Feb 22, 2024
c67383a
Always save YAML for recalibrate-BAM
yashpatel6 Feb 22, 2024
3f50718
Always save YAML for targeted-coverage
yashpatel6 Feb 22, 2024
705bcc4
Save logs for convert-BAM2FASTQ CSV creation
yashpatel6 Feb 22, 2024
75f067c
Save logs for call-mtSNV CSV creation
yashpatel6 Feb 22, 2024
cea4d20
Update channel for passing CSV to call-mtSNV
yashpatel6 Feb 22, 2024
b6025cc
Fix log saving in call-gSV
yashpatel6 Feb 22, 2024
4cd1a55
Update channel for passing CSV to call-gSV
yashpatel6 Feb 22, 2024
b96e599
Update CSV channels to use named channels
yashpatel6 Feb 22, 2024
7f6f2c0
Save align-DNA process logs
yashpatel6 Feb 23, 2024
f2d0b27
Save call-gSNP process logs
yashpatel6 Feb 23, 2024
6693890
Save call-gSV logs
yashpatel6 Feb 23, 2024
59eb84a
Save call-mtSNV process logs
yashpatel6 Feb 23, 2024
432fade
Add logs to output for mtsnv
yashpatel6 Feb 23, 2024
acc96b3
Save process logs for call-sSNV
yashpatel6 Feb 23, 2024
08c9f89
Save process logs for convert-BAM2FASTQ
yashpatel6 Feb 23, 2024
e026f6c
Save logs for call-sSV process
yashpatel6 Feb 23, 2024
3dca33c
Save process logs for recalibrate-BAM
yashpatel6 Feb 23, 2024
22fa09f
Save logs for targeted-coverage process
yashpatel6 Feb 23, 2024
d3b6c4b
Fix format for tuple emission
yashpatel6 Feb 26, 2024
c34ff56
Merge main into branch
yashpatel6 Feb 26, 2024
cb6d61e
Save process logs for calculate targeted coverage run
yashpatel6 Feb 26, 2024
b87f40f
Merge branch 'main' of github.com:uclahs-cds/metapipeline-DNA into ya…
yashpatel6 Mar 1, 2024
897fabf
Add logdir for all pipeline processes
yashpatel6 Mar 4, 2024
00009f8
Add log param to tests and update tests for call-sCNA
yashpatel6 Mar 4, 2024
d3c5381
Update CHANGELOG
yashpatel6 Mar 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
+ Align-DNA: 9.0.0 -> 10.0.0-rc.1
+ Sanitize metadata passed to align-DNA
+ Calculate-targeted-coverage: update name from targeted-coverage
+ Save logs and input files for all pipelines

---

Expand Down
18 changes: 15 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ process create_CSV_metapipeline_DNA {
publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process}-${identifier}/log${file(it).getName()}" }
saveAs: { "${task.process}/${identifier}/log${file(it).getName()}" }

publishDir path: "${params.final_output_dir}/intermediate/${task.process}-${identifier}",
enabled: params.save_intermediate_files,
Expand Down Expand Up @@ -116,6 +116,11 @@ process create_CSV_metapipeline_DNA {
* @return pipeline_params_json (file): JSON file containing all pipeline-specific params
*/
process create_config_metapipeline_DNA {
publishDir path: "${params.final_output_dir}/intermediate",
mode: "copy",
pattern: "*.json",
saveAs: { "${task.process}/${identifier}-${file(it).getName()}" }

input:
tuple(
val(patient),
Expand Down Expand Up @@ -157,8 +162,7 @@ process call_metapipeline_DNA {
publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process}-${patient}/log${file(it).getName()}" }

saveAs: { "${task.process}/${patient}-${new StringBuilder(task.hash).insert(2, '-').toString()}/log${file(it).getName()}" }

input:
tuple(
Expand Down Expand Up @@ -205,13 +209,21 @@ process call_metapipeline_DNA {
}

process check_process_status {
publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process}/${file(work_dir).getParent().getFileName()}-${file(work_dir).getFileName()}/log${file(it).getName()}" }

input:
tuple val(work_dir), val(sbatch_ret)

debug true

when params.uclahs_cds_wgs

output:
path(".command.*")

script:
"""
if `echo ${sbatch_ret} | grep -q "Submitted batch job"`
Expand Down
6 changes: 6 additions & 0 deletions module/align_DNA/call_align_DNA.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ process call_align_DNA {
mode: "copy",
pattern: "align-DNA-*/*"

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${sample}/log${file(it).getName()}" }

input:
tuple(
val(patient),
Expand All @@ -21,6 +26,7 @@ process call_align_DNA {
output:
tuple val(sample), path(output_directory), emit: align_dna_output_directory
file "align-DNA-*/*"
file ".command.*"

script:
output_directory = "align-DNA-*/${sample}"
Expand Down
3 changes: 1 addition & 2 deletions module/align_DNA/create_CSV_align_DNA.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ process create_CSV_align_DNA {
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${params.patient}/${sample}/log${file(it).getName()}" }

publishDir path: "${params.output_dir}/intermediate/${task.process}-${params.patient}/${sample}",
enabled: params.save_intermediate_files,
publishDir path: "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${sample}",
mode: "copy",
pattern: "*.csv"

Expand Down
2 changes: 1 addition & 1 deletion module/align_DNA/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
*/

include { create_CSV_align_DNA } from "./create_CSV_align_DNA" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { call_align_DNA } from "./call_align_DNA"
include { call_align_DNA } from "./call_align_DNA" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"
include { identify_align_dna_outputs } from "./identify_outputs"
include { sanitize_string } from "../../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import org.yaml.snakeyaml.Yaml
*/
process create_YAML_calculate_targeted_coverage {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${sample_id}",
enabled: params.save_intermediate_files,
pattern: 'targeted_coverage_input.yaml',
mode: 'copy'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ include { combine_input_with_params } from '../common.nf'
process run_calculate_targeted_coverage {
cpus params.calculate_targeted_coverage.subworkflow_cpus

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/ ')}-${sample_id_for_targeted_coverage}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "calculate-targeted-coverage-*/*"
Expand All @@ -23,6 +28,7 @@ process run_calculate_targeted_coverage {

output:
file "calculate-targeted-coverage-*/*"
file ".command.*"
val('done'), emit: complete

script:
Expand Down
2 changes: 1 addition & 1 deletion module/calculate_targeted_coverage/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Main entry point for calling calculate-targeted-coverage pipeline
*/
include { create_YAML_calculate_targeted_coverage } from "${moduleDir}/create_YAML_calculate_targeted_coverage"
include { run_calculate_targeted_coverage } from "${moduleDir}/run_calculate_targeted_coverage"
include { run_calculate_targeted_coverage } from "${moduleDir}/run_calculate_targeted_coverage" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
Expand Down
1 change: 0 additions & 1 deletion module/call_gSNP/create_YAML_call_gSNP.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import org.yaml.snakeyaml.Yaml
*/
process create_YAML_call_gSNP {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${patient_id}",
enabled: params.save_intermediate_files,
pattern: 'call_gSNP_input.yaml',
mode: 'copy'

Expand Down
7 changes: 6 additions & 1 deletion module/call_gSNP/run_call_gSNP.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@ include { combine_input_with_params } from '../common.nf'
process run_call_gSNP {
cpus params.call_gSNP.subworkflow_cpus

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${sample_id_for_call_gsnp}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "call-gSNP-*/*"


input:
tuple(
val(sample_id_for_call_gsnp),
Expand All @@ -26,6 +30,7 @@ process run_call_gSNP {

output:
file "call-gSNP-*/*"
file ".command.*"
val('done'), emit: complete

script:
Expand Down
2 changes: 1 addition & 1 deletion module/call_gSNP/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Main entry point for calling call-gSNP pipeline
*/
include { create_YAML_call_gSNP } from "${moduleDir}/create_YAML_call_gSNP"
include { run_call_gSNP } from "${moduleDir}/run_call_gSNP"
include { run_call_gSNP } from "${moduleDir}/run_call_gSNP" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
Expand Down
6 changes: 3 additions & 3 deletions module/call_gSV/create_CSV_call_gSV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ process create_CSV_call_gSV {
publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process}-${patient_id}/${sample_id}/log${file(it).getName()}" }
saveAs: { "${task.process.replace(':', '/')}-${patient_id}/${sample_id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${patient_id}/${sample_id}",
enabled: params.save_intermediate_files,
pattern: 'call_gSV_input.csv',
mode: 'copy'

Expand All @@ -27,7 +26,8 @@ process create_CSV_call_gSV {
)

output:
path(input_csv)
path(input_csv), emit: call_gsv_csv
path(".command.*")

script:
input_csv = "call_gSV_input.csv"
Expand Down
6 changes: 6 additions & 0 deletions module/call_gSV/run_call_gSV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ include { combine_input_with_params } from '../common.nf'
process run_call_gSV {
cpus params.call_gSV.subworkflow_cpus

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${task.id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "call-gSV-*/*"
Expand All @@ -22,6 +27,7 @@ process run_call_gSV {

output:
path "call-gSV-*/*"
path ".command.*"
val('done'), emit: complete

script:
Expand Down
6 changes: 3 additions & 3 deletions module/call_gSV/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
Main entrypoint for calling call-gSV pipeline
*/

include { create_CSV_call_gSV } from "${moduleDir}/create_CSV_call_gSV"
include { run_call_gSV } from "${moduleDir}/run_call_gSV"
include { create_CSV_call_gSV } from "${moduleDir}/create_CSV_call_gSV" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { run_call_gSV } from "${moduleDir}/run_call_gSV" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
Expand Down Expand Up @@ -51,7 +51,7 @@ workflow call_gSV {

create_CSV_call_gSV(input_ch_create_CSV)

run_call_gSV(create_CSV_call_gSV.out)
run_call_gSV(create_CSV_call_gSV.out.call_gsv_csv)

run_call_gSV.out.complete
.mix( pipeline_predecessor_complete )
Expand Down
17 changes: 8 additions & 9 deletions module/call_mtSNV/create_CSV_call_mtSNV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@

process create_CSV_call_mtSNV {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${mtsnv_sample_id}",
enabled: params.save_intermediate_files,
pattern: 'call_mtSNV_input.csv',
mode: 'copy'


publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${params.patient}/${mtsnv_sample_id}/log${file(it).getName()}" }

input:
tuple(
val(tumour_id),
Expand All @@ -17,13 +21,8 @@ process create_CSV_call_mtSNV {
)

output:
tuple(
val(tumour_id),
val(normal_id),
path(tumour_BAM),
path(normal_BAM),
path(input_csv)
)
tuple val(tumour_id), val(normal_id), path(tumour_BAM), path(normal_BAM), path(input_csv), emit: call_mtsnv_csv
path(".command.*")

script:
input_csv = 'call_mtSNV_input.csv'
Expand Down
11 changes: 11 additions & 0 deletions module/call_mtSNV/run_call_mtSNV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ include { combine_input_with_params } from '../common.nf'
process run_call_mtSNV {
cpus params.call_mtSNV.subworkflow_cpus

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${mtsnv_sample_id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "call-mtSNV-*/*"
Expand All @@ -18,10 +23,16 @@ process run_call_mtSNV {

output:
path "call-mtSNV-*/*"
path ".command.*"
val('done'), emit: complete

script:
sample_mode = (params.sample_mode == 'single') ? 'single' : 'paired'
if (sample_mode == 'single') {
mtsnv_sample_id = normal_sample
} else {
mtsnv_sample_id = tumor_sample
}
String params_to_dump = combine_input_with_params(params.call_mtSNV.metapipeline_arg_map)
"""
set -euo pipefail
Expand Down
6 changes: 3 additions & 3 deletions module/call_mtSNV/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* Module for calling the call-sSNV pipeline
*/

include { create_CSV_call_mtSNV } from "${moduleDir}/create_CSV_call_mtSNV"
include { run_call_mtSNV } from "${moduleDir}/run_call_mtSNV"
include { create_CSV_call_mtSNV } from "${moduleDir}/create_CSV_call_mtSNV" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { run_call_mtSNV } from "${moduleDir}/run_call_mtSNV" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

workflow call_mtSNV {
Expand Down Expand Up @@ -71,7 +71,7 @@ workflow call_mtSNV {
}

create_CSV_call_mtSNV(input_ch_create_CSV)
run_call_mtSNV(create_CSV_call_mtSNV.out)
run_call_mtSNV(create_CSV_call_mtSNV.out.call_mtsnv_csv)

run_call_mtSNV.out.complete
.mix( pipeline_predecessor_complete )
Expand Down
2 changes: 1 addition & 1 deletion module/call_sCNA/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

include { create_YAML_call_sCNA } from "${moduleDir}/create_YAML_call_sCNA"
include { run_call_sCNA } from "${moduleDir}/run_call_sCNA"
include { run_call_sCNA } from "${moduleDir}/run_call_sCNA" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
Expand Down
1 change: 0 additions & 1 deletion module/call_sSNV/create_YAML_call_sSNV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import org.yaml.snakeyaml.Yaml
*/
process create_YAML_call_sSNV {
publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${params.patient}/${sample_id}",
enabled: params.save_intermediate_files,
pattern: 'call_sSNV_input.yaml',
mode: 'copy'

Expand Down
6 changes: 6 additions & 0 deletions module/call_sSNV/run_call_sSNV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ process run_call_sSNV {

maxForks 1

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${sample_id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/output",
mode: "copy",
pattern: "call-sSNV-*/*"
Expand All @@ -30,6 +35,7 @@ process run_call_sSNV {

output:
path "call-sSNV-*/*"
path ".command.*"
val('done'), emit: complete

script:
Expand Down
2 changes: 1 addition & 1 deletion module/call_sSNV/workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Main entry point for calling call-sSNV pipeline
*/
include { create_YAML_call_sSNV } from "${moduleDir}/create_YAML_call_sSNV"
include { run_call_sSNV } from "${moduleDir}/run_call_sSNV"
include { run_call_sSNV } from "${moduleDir}/run_call_sSNV" addParams( log_output_dir: params.metapipeline_log_output_dir )
include { mark_pipeline_complete } from "../pipeline_status"

/*
Expand Down
1 change: 0 additions & 1 deletion module/call_sSV/create_YAML_call_sSV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ process create_YAML_call_sSV {
saveAs: { "${task.process}-${tumor_id}/log${file(it).getName()}" }

publishDir "${params.output_dir}/intermediate/${task.process.replace(':', '/')}-${tumor_id}",
enabled: params.save_intermediate_files,
pattern: 'call_sSV_input.yaml',
mode: 'copy'

Expand Down
6 changes: 6 additions & 0 deletions module/call_sSV/run_call_sSV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@ process run_call_sSV {
mode: "copy",
pattern: "call-sSV-*/*"

publishDir path: "${params.log_output_dir}/process-log",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.replace(':', '/')}-${task.id}/log${file(it).getName()}" }

input:
path(input_yaml)

output:
path "call-sSV-*/*"
path ".command.*"
val('done'), emit: complete

script:
Expand Down
Loading
Loading