Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Submission fasta reroute ick4 #197

Merged
merged 7 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions bin/submission_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,17 @@ def create_genbank_zip(submission_name, submission_files_dir):
while not os.path.isfile(os.path.join(submission_files_dir, submission_name + ".zip")):
time.sleep(10)

# Detect multiple contig fasta
def is_multicontig_fasta(fasta):
headers = set()
with open(fasta, 'r') as file:
for line in file:
if line.startswith('>'):
headers.add(line.strip())
if len(headers) > 1:
return True
return False

# Run Table2asn to generate sqn file for submission
def create_genbank_table2asn(submission_dir, submission_name, submission_files_dir, gff_file=None):
submission_status = "processed-ok"
Expand All @@ -481,16 +492,22 @@ def create_genbank_table2asn(submission_dir, submission_name, submission_files_d
print("Downloading Table2asn.", file=sys.stdout)
download_table2asn(table2asn_dir=table2asn_dir)
# Command to generate table2asn submission file
command = [table2asn_dir, "-t", os.path.join(submission_files_dir, "authorset.sbt"), "-i", os.path.join(submission_files_dir, "sequence.fsa"), \
"-src-file", os.path.join(submission_files_dir, "source.src"), "-locus-tag-prefix", get_gff_locus_tag(gff_file), \
"-o", os.path.join(submission_files_dir, submission_name + ".sqn")]
fasta = os.path.join(submission_files_dir, "sequence.fsa")
command = [table2asn_dir, "-t", os.path.join(submission_files_dir, "authorset.sbt"), "-i", fasta, \
"-src-file", os.path.join(submission_files_dir, "source.src"), "-locus-tag-prefix", get_gff_locus_tag(gff_file), \
"-o", os.path.join(submission_files_dir, submission_name + ".sqn")]
if is_multicontig_fasta(fasta):
command.append("-M")
command.append("n")
command.append("-Z")
if os.path.isfile(os.path.join(submission_files_dir, "comment.cmt")):
command.append("-w")
command.append( os.path.join(submission_files_dir, "comment.cmt"))
if gff_file is not None:
command.append("-f")
command.append(os.path.join(submission_dir, gff_file))
print("Running Table2asn.", file=sys.stdout)
print(command)
proc = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd = os.path.join(os.path.dirname(os.path.abspath(__file__))))
if proc.returncode != 0:
print("Table2asn-Error", file=sys.stderr)
Expand Down
7 changes: 4 additions & 3 deletions bin/validate_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def validate_main(self):
'meta_core_grade': self.meta_core_grade},
errors = {'sample_error_msg': self.sample_error_msg, 'sra_msg': self.sra_msg,
'illumina_error_msg': self.illumina_error_msg, 'nanopore_error_msg': self.nanopore_error_msg,
'list_of_sample_errors': self.list_of_sample_errors,},
'list_of_sample_errors': self.list_of_sample_errors,},
valid_sample_num = self.valid_sample_num,
sample_info = sample_info,
sample_flag = True,
Expand Down Expand Up @@ -688,6 +688,7 @@ def check_instruments(self, instrument_type):
self.meta_illumina_grade = False

# check if the SRA file exists for the first file path
path_failed = False
if instrument_type == 'illumina':
if self.sample_info["illumina_library_layout"].tolist()[0] == 'paired':
paths = [file_path1, file_path2]
Expand Down Expand Up @@ -1008,8 +1009,8 @@ def read_custom_fields_file(self):
# save contents to dictionary
for field_name, field_data in data.items():
self.custom_fields_dict[field_name] = {
'type': field_data['type'],
'samples': field_data['samples'],
'type': field_data['type'],
'samples': field_data['samples'],
'replace_empty_with': field_data['replace_empty_with'],
'new_field_name': field_data['new_field_name']
}
Expand Down
6 changes: 3 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@ process {

withName: METADATA_VALIDATION {
publishDir = [
path: { "${params.output_dir}/${params.validation_outputs}" },
path: { "${params.output_dir}/${params.val_output_dir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: BAKTA {
publishDir = [
path: { "${params.output_dir}/${params.bakta_outputs}" },
path: { "${params.output_dir}/${params.bakta_output_dir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: VADR_POST_CLEANUP {
publishDir = [
path: { "${params.output_dir}/${params.vadr_outputs}" },
path: { "${params.output_dir}/${params.vadr_output_dir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down
2 changes: 0 additions & 2 deletions modules/local/metadata_validation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ process METADATA_VALIDATION {
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
path meta_path

Expand Down
2 changes: 0 additions & 2 deletions modules/local/post_bakta_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ process BAKTA_POST_CLEANUP {
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"

// publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
path bakta_results
path meta_path
Expand Down
3 changes: 0 additions & 3 deletions modules/local/post_vadr_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ process VADR_POST_CLEANUP {
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'staphb/tostadas:latest' : 'staphb/tostadas:latest' }"


publishDir "$params.output_dir", mode: 'copy', overwrite: params.overwrite_output

input:
path vadr_outputs
tuple val(meta), path meta_path
Expand Down
1 change: 1 addition & 0 deletions subworkflows/local/bakta.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ workflow RUN_BAKTA {

emit:
gff3 = BAKTA.out.gff3
fna = BAKTA.out.fna
}


23 changes: 14 additions & 9 deletions workflows/tostadas.nf
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,6 @@ workflow TOSTADAS {
// Create initial submission channel
submission_ch = metadata_ch.join(reads_ch)

// print error if they provide gff and annotation flag
// if ( !params.annotation && params.genbank ) {
// // todo: make an error msg that follows the rest of the code protocol
// throw new Exception("Cannot submit to GenBank without assembly and annotation files")
// }

// check if the user wants to skip annotation or not
if ( params.annotation ) {
if ( params.virus && !params.bacteria ) {
Expand All @@ -110,7 +104,7 @@ workflow TOSTADAS {
}

// set up submission channels
submission_ch = submission_ch.join(repeatmasker_gff_ch) // meta.id, fasta, fastq1, fastq2, gff
submission_ch = submission_ch.join(repeatmasker_gff_ch) // meta.id, tsv, fasta, fastq1, fastq2, gff
}

// run vadr processes
Expand All @@ -126,7 +120,7 @@ workflow TOSTADAS {
meta['id'] = it.getSimpleName().replaceAll('_reformatted', '')
[ meta, it ]
}
submission_ch = submission_ch.join(vadr_gff_ch) // meta.id, fasta, fastq1, fastq2, gff
submission_ch = submission_ch.join(vadr_gff_ch) // meta.id, tsv, fasta, fastq1, fastq2, gff
}
}
if ( params.bacteria ) {
Expand All @@ -140,9 +134,20 @@ workflow TOSTADAS {
.flatten()
.map {
meta = [id:it.getSimpleName()]
//meta = it.getSimpleName()
[ meta, it ]
}
bakta_fasta_ch = RUN_BAKTA.out.fna
.flatten()
.map {
meta = [id:it.getSimpleName()]
//meta = it.getSimpleName()
[ meta, it ]
}
submission_ch = submission_ch.join(bakta_gff_ch) // meta.id, fasta, fastq1, fastq2, gff
submission_ch = submission_ch.join(bakta_gff_ch) // meta.id, tsv, fasta, fastq1, fastq2, gff
submission_ch = submission_ch.map { meta, tsv, _, fq1, fq2, gff -> [meta, tsv, fq1, fq2, gff] } // drop original fasta
submission_ch = submission_ch.join(bakta_fasta_ch) // join annotated fasta
submission_ch = submission_ch.map { meta, tsv, fq1, fq2, gff, fasta -> [meta, tsv, fasta, fq1, fq2, gff] } // meta.id, tsv, annotated fasta, fastq1, fastq2, gff
}
}
}
Expand Down
Loading