-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcompress_liftover.py
103 lines (100 loc) · 4.1 KB
/
compress_liftover.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import csv
import sys
interval = 20
output_dir = sys.argv[1] + "/"
in_file_name = output_dir + sys.argv[2]
out_file_name = output_dir + sys.argv[3]
#consider reverse strand also
with open(in_file_name, "r") as file:
reader = csv.reader(file, delimiter="\t")
start_row = next(reader)
chr_name = start_row[4]
contig_name = start_row[0]
chr_pos = int(start_row[5])
contig_pos = int(start_row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
ctr = 1
forward_strand = True
out_file = open(out_file_name, "w")
out_file.write(chr_name + "\t" + contig_name + "\t")
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
for row in reader:
if (row[4] == chr_name) and (row[0] == contig_name) and (int(row[5]) - pre_chr_pos == interval) and \
(int(row[1]) - pre_contig_pos == interval) and forward_strand:
pre_chr_pos = int(row[5])
pre_contig_pos = int(row[1])
ctr += 1
elif (row[4] == chr_name) and (row[0] == contig_name) and (int(row[5]) - pre_chr_pos == interval) and \
(int(row[1]) - pre_contig_pos == -interval) and not forward_strand:
pre_chr_pos = int(row[5])
pre_contig_pos = int(row[1])
ctr += 1
else:
#if different chr or contig
if row[4] != chr_name or row[0] != contig_name:
#print(chr_name, contig_name)
out_file.write(str(ctr)+":")
if forward_strand:
out_file.write("+;")
else:
out_file.write("-;")
out_file.write("\n")
chr_name = row[4]
contig_name = row[0]
chr_pos = int(row[5])
contig_pos = int(row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
ctr = 1
forward_strand = True
out_file.write(chr_name + "\t" + contig_name + "\t")
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
elif int(row[5]) - pre_chr_pos != interval:
out_file.write(str(ctr)+":")
if forward_strand:
out_file.write("+;")
else:
out_file.write("-;")
chr_pos = int(row[5])
contig_pos = int(row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
ctr = 1
forward_strand = True
elif (int(row[1]) - pre_contig_pos != interval) and (int(row[1]) - pre_contig_pos != -interval):
out_file.write(str(ctr)+":")
if forward_strand:
out_file.write("+;")
else:
out_file.write("-;")
chr_pos = int(row[5])
contig_pos = int(row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
ctr = 1
forward_strand = True
elif (int(row[1]) - pre_contig_pos == interval) and not forward_strand:
out_file.write(str(ctr)+":")
out_file.write("-;")
chr_pos = int(row[5])
contig_pos = int(row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
ctr = 1
forward_strand = True
elif (int(row[1]) - pre_contig_pos == -interval) and forward_strand:
out_file.write(str(ctr)+":")
out_file.write("+;")
chr_pos = int(row[5])
contig_pos = int(row[1])
pre_chr_pos = chr_pos
pre_contig_pos = contig_pos
out_file.write(str(chr_pos)+":"+str(contig_pos)+":")
ctr = 1
forward_strand = False
#write last lines
out_file.close()