-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_dia.nf
172 lines (128 loc) · 5.14 KB
/
main_dia.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
/* MPC-Nextflow-Quality-Control-Workflow --> MPC-QuaC-Workflow
This is the QC-Workflow which generates various statistics from measured ThermoFischer-data ("RAW-files").
Here we extract XICs, Identifications and other various information and save them to a database.
The <TODO_VIS>.nf-Script then can be used to genereate various plots from the extracted data for inspection.
Example call:
nextflow run \
-resume main.nf \
--main_raw_spectra_folder <Path_to_folder_of_raws>/raws \
--main_fasta_file <Path_to_FASTA_file>.fasta
*/
// Include all the needed workflows from the sub-workflows
// Extend this to also extend the QC-Workflow
PROJECT_DIR = workflow.projectDir
include {convert_to_mgf_mzml} from PROJECT_DIR + '/convert_to_mgf_mzml.nf'
include {get_various_mzml_infos} from PROJECT_DIR + '/get_mzml_chromatogram_and_more.nf'
include {ident_via_comet} from PROJECT_DIR + '/identification_via_comet.nf'
include {execute_pia} from PROJECT_DIR + '/pia.nf'
include {retrieve_spikeins} from PROJECT_DIR + '/retrieve_spike_ins.nf' // We could also consider to expose params.spk_spike_ins, however it is always fixed for our ISA-stadard!
include {get_features} from PROJECT_DIR + '/get_features_in_raws.nf'
include {get_custom_headers} from PROJECT_DIR + '/get_custom_columns_from_file_directly.nf'
// Each script has its own UNIQUE-param-attribute and can be fine-tuned from this main.nf-script.
// The requiered params are also exposed in this script and are listed below:
// Parameters required for the standalone execution of this main-nextflow script
params.main_raw_spectra_folder = "" // The folder containing the raw spectra
params.main_outdir = "$PWD/results" // Output-Directory of the Identification Results. Here it is <Input_File>.mzid
// Here are some optional Parameters which can be set if needed
params.main_is_isa = true // Parameter to check if we execute a isa specific xic extraction (NOTE: FASTA has to contain the SpikeIns too!)
// MAIN WORKFLOW
workflow {
// Retrieve RAW-Spectra
rawspectra = Channel.fromPath(params.main_raw_spectra_folder + "/*.{raw,d}", type: "any")
// Convert to needed formats:
convert_to_mgf_mzml(rawspectra) // 0 --> .mgf | 1 --> .mzML (peak-picked)
// Retreive MZML Statistics
get_various_mzml_infos(convert_to_mgf_mzml.out[1])
/* No identification */
emulate_psm_results(rawspectra)
// Specific to ISA: Do XIC-Extraction if specified
if (params.main_is_isa) {
retrieve_spikeins(rawspectra, emulate_psm_results.out)
}
// Run Feature Finding and Statistics
get_features(convert_to_mgf_mzml.out[1], emulate_psm_results.out)
// Get Thermospecific information from raw
get_custom_headers(rawspectra)
// Concatenate to large csv
combined_csvs = get_various_mzml_infos.out.collect().concat(
retrieve_spikeins.out.collect(),
get_features.out.map { it[1] }.collect(),
get_custom_headers.out.collect()
).collect()
combine_output_to_table(combined_csvs)
// Visualize the results
visualize_results(combine_output_to_table.out)
}
process emulate_psm_results {
input:
file raw_spectra
output:
file "${raw_spectra.baseName}.mzTab"
"""
# We just emulate no PSM results
touch ${raw_spectra.baseName}.mzTab
"""
}
process combine_output_to_table {
publishDir "${params.main_outdir}/qc_results", mode:'copy'
input:
file(input_csv_files)
output:
file("quality_control.csv")
"""
CONCAT_CSVS=""
for file in $input_csv_files
do
CONCAT_CSVS+="\$file,"
done
CONCAT_CSVS=\$(echo \$CONCAT_CSVS | rev | cut -c2- | rev)
unify_csv_tables.py -out_csv quality_control.csv -input_csvs \$CONCAT_CSVS
"""
}
process visualize_results {
publishDir "${params.main_outdir}/qc_results", mode:'copy'
input:
file(complete_csv)
output:
file("*.json")
file("*.html")
file("*.csv")
path("fig13_ionmaps")
"""
QC_visualization.py -csv_file $complete_csv -output "."
"""
}
// // TODO Usefull bits:
// params.help = false
// if(params.help) {
// println(""" \nusage : ~/nextflow main.nf [operators] --input inputFiles
// example : ~/nextflow main.nf --input \"path/fastas/\"
// operators: --help calls this help option\n """)
// System.exit(0)
// } else {
// dir= workflow.projectDir.getParent() + "/results/"
// include {msgfplus_buildsa; msgfplus_search_mgf} from PROJECT_DIR + "/identification_via_msgfplus.nf"
// workflow msgfplus{
// // Get all MGF files which should be identified
// // mgfs = Channel.fromPath(converter.out)
// // Get FASTA-file
// fasta_file = Channel.fromPath(params.fasta_file)
// // Get Modification Parameters file
// modifications_file = Channel.fromPath(params.search_parameter_file)
// take: data
// main:
// // Build indexed fasta for MSGFPLUS
// msgfplus_buildsa(fasta_file)
// // Combined channel replicated the indexed fasta for each MGF to be reused
// combined_channel = fasta_file
// .combine(modifications_file)
// .combine(data)
// .combine(msgfplus_buildsa.out.toList())
// // Start search
// msgfplus_search_mgf(combined_channel)
// msgfplus_search_mgf.out.view()
// emit:
// msgfplus_search_mgf.out
// }