Skip to content

Commit

Permalink
Delete unnecessary BAMs (#147)
Browse files Browse the repository at this point in the history
* Add file deletion function

* Use deletion function in recalibrate-BAM

* Fix deletion functions

* Update CHANGELOG

* Move changelog item to specific version

* Add comments for functions

* Version bump in nextflow.config
  • Loading branch information
yashpatel6 authored Nov 22, 2023
1 parent fd33031 commit 09acfc2
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
### Changed
+ Default to alt-aware reference for align-DNA
+ Re-order FASTQ CSV to match order in align-DNA
### Added
+ Deletion step for normal BAMs when running multi-sample patients in paired mode

---

Expand Down
41 changes: 41 additions & 0 deletions module/common.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,44 @@ String identify_file(filepath) {
assert file_found.exists();
return file_found.toRealPath().toString()
}
/**
* Function to delete a file once it has been copied over to a final destination
*/
void delete_file(String filepath, output_filepattern) {
File expected_file = new File(filepath);
Integer expected_bytes = expected_file.length(); // The size of the file in bytes

String output_filepath = '';

// Wait and find the output file
Boolean keep_looking = true;
while (keep_looking) {
try {
// Try to access the final output file
output_filepath = identify_file(output_filepattern);
System.out.println("Found: ${output_filepath}");
keep_looking = false;
} catch (AssertionError e) {
if (e.toString().replace(' ', '').contains('[]0')) {
// Output file not found (empty list of identified files) so wait and retry
sleep(5000);
} else {
// Error with output file existence, skip deletion
keep_looking = false;
System.out.println("Failed to find final output file: ${output_filepattern}, not deleting ${filepath}.");
return;
}
}
}

File output_file = new File(output_filepath);
Integer output_bytes = output_file.length();

// Wait until final output file matches size in bytes with original file then delete original
while (output_bytes != expected_bytes) {
sleep(5000);
output_bytes = output_file.length();
}

expected_file.delete()
}
12 changes: 10 additions & 2 deletions module/recalibrate_BAM/identify_outputs.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include { identify_file } from '../common'
include { identify_file; delete_file } from '../common'

workflow identify_recalibrate_bam_outputs {
take:
Expand All @@ -7,9 +7,17 @@ workflow identify_recalibrate_bam_outputs {
main:
och_recalibrate_bam.map{ recalibrate_bam_out ->
recalibrate_bam_out[0].normal.each { normal_sample ->
String bam_file = identify_file("${recalibrate_bam_out[1]}/*GATK-*${normal_sample}*.bam");
if (!params.sample_data[normal_sample]['recalibrate-BAM']['BAM']) {
params.sample_data[normal_sample]['recalibrate-BAM']['BAM'] = identify_file("${recalibrate_bam_out[1]}/*GATK-*${normal_sample}*.bam");
params.sample_data[normal_sample]['recalibrate-BAM']['BAM'] = bam_file;
params.sample_data[normal_sample]['recalibrate-BAM']['contamination_table'] = identify_file("${recalibrate_bam_out[2]}/GATK-*${normal_sample}_alone.table");
} else {
// Normal file already found so delete any other normals - only triggered when running multiple samples in paired mode

// Replace the work_dir prefix in the output path with the output_dir prefix for final output
String separator = "/recalibrate-BAM-";
String dir_stripped = recalibrate_bam_out[1].toString().replaceFirst("${params.work_dir}.*${separator}", "");
delete_file(bam_file, "${params.output_dir}/output${separator}${dir_stripped}/*GATK-*${normal_sample}*.bam");
};
};
recalibrate_bam_out[0].tumor.each { tumor_sample ->
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ manifest {
name = 'metapipeline-DNA'
author = ['Yash Patel', 'Chenghao Zhu', 'Helena Winata', 'Alfredo Enrique Gonzalez', 'Nicholas Wang', 'Mohammed Faizal Eeman Mootor']
description = 'Nextflow pipeline to convert BAM to FASTQ, align, call gSNP, call sSNV, call gSV, call sSV and call mtSNV'
version = '5.1.0'
version = '5.2.0'
}

0 comments on commit 09acfc2

Please sign in to comment.