-
Notifications
You must be signed in to change notification settings - Fork 2
/
guppy_bcsplit.py
executable file
·99 lines (82 loc) · 2.79 KB
/
guppy_bcsplit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/python3
import sys
import os
import time
import csv
import getopt
from Bio import SeqIO
def touch(path):
with open(path, 'w'):
os.utime(path, None)
def main(argv):
barcodes_file = ''
fastq_file = ''
prefix = ''
summary_file = ''
nr_mandatory_args = 0
try:
opts, args = getopt.getopt(argv,"hb:f:p:s:",["barcode=","fastq=","prefix=","summary="])
except getopt.GetoptError:
print('parameter error. usage: guppy_bcsplit.py -b <barcode_file> -f <fastq_file> -p <your_prefix> -s <summary_file (optional)>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('usage: guppy_bcsplit.py -b <barcode_file> -f <fastq_file> -p <your_prefix> -s <summary_file (optional)>')
sys.exit()
elif opt in ("-b", "--barcode"):
barcodes_file = arg
nr_mandatory_args += 1
elif opt in ("-f", "--fastq"):
fastq_file = arg
nr_mandatory_args += 1
elif opt in ("-p", "--prefix"):
prefix = arg
nr_mandatory_args += 1
elif opt in ("-s", "--summary"):
summary_file = arg
else:
assert False, "unhandled option"
if(nr_mandatory_args < 3):
print('parameter error. usage: guppy_bcsplit.py -b <barcode_file> -f <fastq_file> -p <your_prefix> -s <summary_file (optional)>')
sys.exit(2)
starttime = time.time()
read_to_barcode = {}
my_stats = {}
with open(barcodes_file) as csvfile:
readCSV = csv.reader(csvfile, delimiter='\t')
header = ' '.join(next(readCSV))
if('barcode' not in header):
print('Barcode file seems to be not valid.')
sys.exit(2)
for row in readCSV:
if row[1] in my_stats:
my_stats[row[1]] += 1
else:
my_stats[row[1]] = 1
read_to_barcode[row[0]] = row[1]
if(summary_file != ''):
total_reads = 0
with open(summary_file, "w") as summ_file_handle:
for x in sorted(my_stats):
summ_file_handle.write(x+':'+str(my_stats[x])+'\n')
total_reads += (my_stats[x]) # Add read count to the total count
summ_file_handle.write('Total reads:'+ str(total_reads))
for x in my_stats:
current_barcode = x
touch(prefix + '_' + current_barcode + '.fastq')
fastq_parser = SeqIO.parse(fastq_file, "fastq")
for fastq_rec in fastq_parser:
read_name = fastq_rec.id
current_barcode = read_to_barcode[read_name]
with open(prefix + '_' + current_barcode + '.fastq', 'a') as fastq_handle:
SeqIO.write(fastq_rec, fastq_handle, "fastq")
endtime = time.time()
if(summary_file != ''):
with open(summary_file, "a+") as summ_file_handle:
summ_file_handle.write('\n'+'Finished sorting in {0:.0f} seconds.'.format(endtime - starttime)) # Writes the time it took to demultiplex the files
summ_file_handle.seek(0) # Go back to the beginning so it can be read
print(summ_file_handle.read())
else:
print('Finished sorting in {0:.0f} seconds.'.format(endtime - starttime))
if __name__ == "__main__":
main(sys.argv[1:])