-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex_fixer.py
119 lines (103 loc) · 4.42 KB
/
index_fixer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import sys
import re
import click
from flowcell_parser.classes import SampleSheetParser
def nuc_compliment(nuc):
if nuc == 'A':
return 'T'
elif nuc == 'T':
return 'A'
elif nuc == 'C':
return'G'
elif nuc == 'G':
return 'C'
else:
sys.exit("Critical error. Unknown nucleotide found: {}.".format(nuc))
@click.command()
@click.option('--path', required=True,help='Path to the Samplesheet. E.g. ~/fc/161111_M01320_0095_000000000-AWE6P.csv')
@click.option('--swap', is_flag=True,help='Swaps index 1 with 2 and vice versa.')
@click.option('--rc1', is_flag=True,help='Exchanges index 1 for its reverse compliment.')
@click.option('--rc2', is_flag=True,help='Exchanges index 2 for its reverse compliment.')
@click.option('--platform', required=True, type=click.Choice(['hiseq', 'miseq', 'hiseqx']), help="Run platform ('hiseq', 'miseq', 'hiseqx')")
@click.option('--sampleswap', '--ss', multiple=True, type=(unicode, unicode), help='Swap index between sample pairs. Use one --ss per pair.')
def main(path, swap, rc1, rc2, platform, ss):
ss_reader=SampleSheetParser(path)
ss_data=ss_reader.data
single = True
if platform == "hiseq":
index1 = 'Index'
if re.search('[-+]', (ss_data[0][index1])):
single = False
elif platform == "miseq":
index1 = 'index'
index2 = 'index2'
if index2 in ss_data[0]:
single = False
elif platform == "hiseqx":
index1 = 'index1'
index2 = 'index2'
single = False
if single:
#Sanity check
if rc2 or swap:
sys.exit("Single index. Cannot change index 2, nor swap indexes")
#Reverse compliment
if rc1:
for row in ss_data:
index_in = re.match('([ATCG]{4,12})', row[index1])
if index_in:
if rc1:
rc = ""
for nuc in index_in.group(1)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}'.format(rc)
if not single:
#Reverse Compliment
if rc1 or rc2:
for row in ss_data:
if platform == "hiseq":
index_in = re.match('([ATCG]{4,12})[-+]([ATCG]{4,12})', row[index1])
if rc1:
rc = ""
for nuc in index_in.group(1)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}-{}'.format(rc, index_in.group(2))
if rc2:
rc = ""
for nuc in index_in.group(2)[::-1]:
rc = rc + nuc_compliment(nuc)
row[index1] = '{}-{}'.format(index_in.group(1), rc)
elif platform == "miseq" or platform == "hiseqx":
if rc1:
rc = ""
for nuc in row['index1'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index1'] = rc
if rc2:
rc = ""
for nuc in row['index2'][::-1]:
rc = rc + nuc_compliment(nuc)
row['index2'] = rc
#Swap indexes
if swap:
for row in ss_data:
if platform == "hiseq":
index_in = re.match('([ATCG]{4,12})[-+]([ATCG]{4,12})', row[index1])
row[index1] = '{}-{}'.format(index_in.group(2), index_in.group(1))
elif platform == "miseq" or platform == "hiseqx":
storage = row['index1']
row['index1'] = row['index2']
row['index2'] = storage
#Rearrange samples
if ss:
#Need to catch all samples in a list prior to writing, then dump them in corrected order
sys.exit("Sample Swap isn't implemented yet.")
redemux_ss = ss_reader.generate_clean_samplesheet()
if platform == "hiseq" or platform == "hiseqx":
filename = re.search('\/(\w+).csv$', path).group(1)
else:
filename = "SampleSheet"
with open('{}_redemux.csv'.format(filename), 'w') as fh_out:
fh_out.write(redemux_ss)
if __name__ == '__main__':
main()