forked from AprilJack/igc_script_backup
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGTFsubtractor.java
More file actions
executable file
·98 lines (93 loc) · 2.56 KB
/
GTFsubtractor.java
File metadata and controls
executable file
·98 lines (93 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
public class GTFsubtractor {
public GTFsubtractor(String[] args) {
if (args.length < 2)
{
System.err.println("Will subtract regions from a GTF based on another GTF. Only the last exon will be kept.");
System.err.println("\nUsage: GTFsubtractor file1.gtf.gz file2.gtf.gz > file1-file2.gtf");
System.exit(1);
}
GTF gtf1 = new GTF(args[0]);
GTF gtf2 = new GTF(args[1]);
int count =0;
System.err.println("Outputing non-overlapping 3'UTR");
for(String chrStr : gtf1.byChr.keySet())
{
boolean plus = chrStr.startsWith("+");
for(Annotation a: gtf1.byChr.get(chrStr))
{
Region last = null;
for(Region r: a.positions)
{
if(last == null || (plus && last.end < r.end)|| (!plus && last.start > r.start))
last = r;
}
if(gtf2.byChr.get(chrStr) != null)
{
for(Annotation b: gtf2.byChr.get(chrStr)){
if(b.overlaps(a) > 0)
{
//overlaps
//lets just output the last exon of a trimmed down by the last exon of b...
for(Region r: b.positions)
{
if(last.overlaps(r.start, r.end)>0 )
{
if(plus)
{
if(r.end >= last.start && r.end < last.end)
{
last.start = r.end;
}
else if(r.start > last.start && r.start <= last.end)
{
last.end = r.start;
}
else //it completely covers
{
last.start=last.end;
break;
}
}
else
{
if(r.start > last.start && r.start <= last.end)
{
last.end = r.start;
}
else if(r.end >= last.start && r.end < last.end)
{
last.start = r.end;
}
else //it completely covers
{
last.start=last.end;
break;
}
}
}
}
if(last.start == last.end)
break;
}
}
}
//lets output the last region that is left
if(last.start < last.end)
{
System.out.println(a.chr.substring(1)+"\tGTFSubtractor\ttranscript\t"+last.start+"\t"+last.end+"\t1000\t"+chrStr.charAt(0)+"\t.\t"+a.annotations);
System.out.println(a.chr.substring(1)+"\tGTFSubtractor\texon\t"+last.start+"\t"+last.end+"\t1000\t"+chrStr.charAt(0)+"\t.\t"+a.annotations);
}
count++;
if(count%1000==0) System.err.print(".");
}
}
System.err.println();
}
public static void main(String[] args) {
// TODO Auto-generated method stub
new GTFsubtractor(args);
}
}