-
Notifications
You must be signed in to change notification settings - Fork 4
/
summarizeStat.awk
executable file
·50 lines (50 loc) · 1.17 KB
/
summarizeStat.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/awk -f
BEGIN{
FS="\t";
OFS=FS;
#If we don't pass in genomewide as a non-zero value, output per-scaffold:
if (length(genomewide)==0) {
genomewide=0;
};
#Account for weights in column 4 if indicated, otherwise consider it omit:
if (length(weighted)==0) {
weighted=0;
};
}
{
#Only include sites without the omission column if that column is present:
if (weighted == 0 && (NF == 3 || $4 == 0)) {
stat[$1]+=$3;
count[$1]+=1;
} else if (weighted == 1) {
#If weighted is selected, but we don't have a weight column, revert:
if (weighted != 0 && NF > 3) {
stat[$1]+=$3*$4;
count[$1]+=$4;
} else {
stat[$1]+=$3;
count[$1]+=1;
};
};
}
END{
if (genomewide != 0) {
for (scaf in stat) {
stat_gw+=stat[scaf];
count_gw+=count[scaf];
};
if (count_gw > 0) {
print "Genome-wide", stat_gw/count_gw;
} else {
print "Genome-wide", "NA";
};
} else {
for (scaf in stat) {
if (count[scaf] > 0) {
print scaf,stat[scaf]/count[scaf];
} else {
print scaf,"NA";
};
};
};
}