Skip to content

Commit d3777d7

Browse files
committed
Support additonal FASTQ sequence designed for UMI file
Changes: - Parse input subworkflow to support 3rd FASTQ in addition to R1 and R2 - Checks number of FASTQ files matches the number of read structures
1 parent 0a4172a commit d3777d7

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

assets/schema_input.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,20 @@
2727
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
2828
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
2929
},
30+
"fastq_3": {
31+
"type": "string",
32+
"format": "file-path",
33+
"exists": true,
34+
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
35+
"errorMessage": "FastQ file for reads 3 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
36+
},
3037
"read_structure": {
3138
"type": "string",
3239
"pattern": "^.*$",
3340
"errorMessage": "Read structure must be provided; For format, see: https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures",
3441
"meta": ["read_structure"]
3542
}
3643
},
37-
"required": ["sample", "fastq_1", "fastq_2", "read_structure"]
44+
"required": ["sample", "fastq_1", "read_structure"]
3845
}
3946
}

subworkflows/local/utils_nfcore_fastquorum_pipeline/main.nf

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,18 @@ workflow PIPELINE_INITIALISATION {
9292
Channel
9393
.fromSamplesheet("input")
9494
.map {
95-
meta, fastq_1, fastq_2 ->
96-
if (!fastq_2) {
97-
return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
98-
} else {
95+
meta, fastq_1, fastq_2, fastq_3 ->
96+
if (fastq_3) {
97+
return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2, fastq_3 ] ]
98+
} else if (fastq_2) {
9999
return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
100+
} else {
101+
return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
100102
}
101103
}
104+
.map {
105+
validateReadStructure(it)
106+
}
102107
.groupTuple()
103108
.map {
104109
validateInputSamplesheet(it)
@@ -109,6 +114,8 @@ workflow PIPELINE_INITIALISATION {
109114
}
110115
.set { ch_samplesheet }
111116

117+
ch_samplesheet.view()
118+
112119
emit:
113120
samplesheet = ch_samplesheet
114121
versions = ch_versions
@@ -163,6 +170,21 @@ def validateInputParameters() {
163170
genomeExistsError()
164171
}
165172

173+
def validateReadStructure(input) {
174+
def id = input[0]
175+
def meta = input[1]
176+
def fastqs = input[2]
177+
178+
def num_fastqs = fastqs.size()
179+
def num_structures = meta.read_structure.tokenize(" ").size()
180+
181+
if (num_fastqs != num_structures) {
182+
error("Please check input samplesheet -> Number of fastq files (${num_fastqs}) does not match the number of read structures (${num_structures}): ${id}, '${meta.read_structure}'")
183+
}
184+
return [ id, meta, fastqs ]
185+
}
186+
187+
166188
//
167189
// Validate channels from input samplesheet
168190
//
@@ -177,6 +199,7 @@ def validateInputSamplesheet(input) {
177199

178200
return [ metas[0], fastqs ]
179201
}
202+
180203
//
181204
// Get attribute from genome config file e.g. fasta
182205
//

0 commit comments

Comments
 (0)