forked from GMOD/jbrowse
-
Notifications
You must be signed in to change notification settings - Fork 3
/
draw-basepair-track.pl
executable file
·201 lines (158 loc) · 5.53 KB
/
draw-basepair-track.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env perl
=head1 NAME
draw-basepair-track.pl - make a track that draws semicircular diagrams of DNA base pairing
=head1 USAGE
bin/draw-basepair-track.pl \
--gff <GFF file> \
[ --out <JSON directory> ] \
[ --tracklabel <track identifier> ] \
[ --key <human-readable track name> ] \
[ --bgcolor <R,G,B> ] \
[ --fgcolor <R,G,B> ] \
[ --thickness <line thickness> ] \
[ --width <tile width> ] \
[ --height <tile height> ] \
[ --nolinks ]
=head1 OPTIONS
=over 4
=item --out <val>
Data directory to write to. Defaults to C<data/>.
=item --trackLabel <val>
Unique name for the track. Defaults to the wiggle filename.
=item --key <val>
Human-readable name for the track. Defaults to be the same as the
C<--trackLabel>.
=item --bgcolor <R,G,B>
Background color for the image track. Defaults to C<255,255,255>,
which is white.
=item --fgcolor <R,G,B>
Foreground color for the track, i.e. the color of the lines that are
drawn. Defaults to C<0,255,0>, which is bright green.
=item --thickness <pixels>
Thickness in pixels of the drawn lines. Defaults to 2.
=item --width <pixels>
Width in pixels of each image tile. Defaults to 2000.
=item --height <pixels>
Height in pixels of the image track. Defaults to 100.
=item --nolinks
If passed, do not use filesystem hardlinks to compress duplicate
tiles.
=back
=cut
use strict;
use warnings;
use FindBin qw($Bin);
use lib "$Bin/../lib";
use JBlibs;
use File::Basename;
use Getopt::Long;
use List::Util 'max';
use Pod::Usage;
use POSIX qw (abs ceil);
use ImageTrackRenderer;
my ($path, $trackLabel, $key, $cssClass);
my $outdir = "data";
my $tiledir = "tiles";
my $fgColor = "0,255,0";
my $bgColor = "255,255,255";
my $tileWidth = 2000;
my $trackHeight = 100;
my $thickness = 2;
my $nolinks = 0;
my $help;
GetOptions( "gff=s" => \$path,
"out=s" => \$outdir,
"tracklabel|trackLabel=s" => \$trackLabel,
"key=s" => \$key,
"bgcolor=s" => \$bgColor,
"fgcolor=s" => \$fgColor,
"width=s" => \$tileWidth,
"height=s" => \$trackHeight,
"thickness=s" => \$thickness,
"nolinks" => \$nolinks,
"help|h|?" => \$help,
) or pod2usage();
pod2usage( -verbose => 2 ) if $help;
pod2usage( 'must provide a --gff file' ) unless defined $path;
unless( defined $trackLabel ) {
$trackLabel = $path;
$trackLabel =~ s/\//_/g; # get rid of directory separators
}
# create color ranges
my @fg = split (/,/, $fgColor);
my @bg = split (/,/, $bgColor);
# make ( [R,G,B], [R,G,B], ... ) color triplets for each color index
# that interpolate between the foreground and background colors
my $range = max map abs($fg[$_] - $bg[$_]), 0..2;
my @rgb = map {
my $n = $_;
[ map {
$bg[$_] + $n/$range * ( $fg[$_] - $bg[$_] )
} 0..2
]
} 0..$range;
my ( $gff, $maxscore, $minscore, $maxlen ) = read_gff( $path );
# convert GFF scores into color indices, then sort each sequence's GFF
# features by increasing color index
while (my ($seqname, $gffArray) = each %$gff) {
@$gffArray =
sort { $a->[2] <=> $b->[2] }
map [ $_->[0],
$_->[1],
$_->[2] =~ /\d/ ? int( 0.5 + $range * ($_->[2] - $minscore) / ($maxscore - $minscore) )
: $range
],
@$gffArray;
}
# create the renderer
my $renderer = ImageTrackRenderer->new(
"datadir" => $outdir,
"tilewidth" => $tileWidth,
"trackheight" => $trackHeight,
"tracklabel" => $trackLabel,
"key" => $key,
"link" => !$nolinks,
"drawsub" => sub {
my ($im, $seqInfo) = @_;
my $seqname = $seqInfo->{"name"};
my @color;
for my $rgb (@rgb) {
push @color, $im->colorAllocate (@$rgb);
}
$im->setThickness ($thickness);
for my $gff (@{ $gff->{$seqname} || [] }) {
my $start = $im->base_xpos ($gff->[0]) + $im->pixels_per_base / 2;
my $end = $im->base_xpos ($gff->[1]) + $im->pixels_per_base / 2;
my $arcMidX = ($start + $end) / 2;
my $arcWidth = $end - $start;
my $arcHeight = 2 * $trackHeight * ($gff->[1] - $gff->[0]) / $maxlen;
# warn "Drawing arc from $start to $end, height $arcHeight";
$im->arc ($arcMidX, 0, $arcWidth, $arcHeight, 0, 180, $color[$gff->[2]]);
}
});
# run the renderer
$renderer->render;
# all done
exit;
#############################
# load GFF describing basepair locations & intensities; sort by seqname
sub read_gff {
my %gff;
open my $gff, "<", $path or die "$! reading $path";
my ($maxscore, $minscore, $maxlen);
while (my $gffLine = <$gff>) {
next if $gffLine =~ /^\s*\#/;
next unless $gffLine =~ /\S/;
my ($seqname, $source, $feature, $start, $end, $score, $strand, $frame, $group) = split /\t/, $gffLine, 9;
next if grep (!defined(), $seqname, $start, $end, $score);
$gff{$seqname} = [] unless exists $gff{$seqname};
push @{$gff{$seqname}}, [$start, $end, $score];
if ($score =~ /\d/) {
$maxscore = $score if !defined($maxscore) || $score > $maxscore;
$minscore = $score if !defined($minscore) || $score < $minscore;
}
my $len = $end - $start;
$maxlen = $len if !defined($maxlen) || $len > $maxlen;
}
return ( \%gff,$maxscore, $minscore, $maxlen );
}