-
Notifications
You must be signed in to change notification settings - Fork 0
/
fastqc_parsing.py
executable file
·58 lines (51 loc) · 1.6 KB
/
fastqc_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/local/bin/python3
import re
import glob
import subprocess
import argparse
import os
parser = argparse.ArgumentParser(description=\
""" \
Scrapes folders of fastqc runs and extracts \
important info for large analyses. \
\
Just run in directory holding the fastqc folders. \
Will use the folder names to name the files. """)
parser.add_argument('-d','--directory', help='Give a \
directory to crawl that has the fastqc zip files')
args = parser.parse_args()
dir = args.directory
if dir:
zips = glob.glob(dir + '/*zip')
else:
zips = glob.glob('*.zip')
#unzips the fastqc data
[subprocess.call("unzip " + i, shell=True) for i in zips]
# roll through dirs and parse data
for i in zips:
i = i[:-4]
file = open(i + '/fastqc_data.txt','r')
data = file.readlines()
for line in data:
count = 0
if re.match('^>>END_MODULE',line):
continue
elif re.match('^##', line):
continue
elif re.match('^>>', line):
file_name = line.strip().split()
# grab fastqc pass, warn, fail tag
fastqc_filter = file_name[-1]
# get fastqc section name and use as filename
file_name[0] = file_name[0].split('>>')[1]
file_name = '_'.join(file_name[0:-1])
output = open(file_name,'a')
elif re.match('^Filename',line): #grab input file for FastQC to write to column
filename = line.split()[1]
elif re.match('^#[A-Z]',line): # IDs a new section and writes the header just once
line = line.strip() + '\tFilename' + '\tFastQC_Filter\n'
if os.stat(file_name).st_size == 0:
output.write(line[1:])
else:
line = line.strip() + '\t' + filename + '\t' + fastqc_filter + '\n'
output.write(line)