-
Notifications
You must be signed in to change notification settings - Fork 0
/
excludeSamples.awk
executable file
·56 lines (56 loc) · 1.43 KB
/
excludeSamples.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/awk -f
#This script simply takes the asymmetric complement of the samples in
# the second input file relative to those of the first. That is, any
# samples in the first file are excluded from the second file (if found).
#I've also added the capacity to handle a more complex second file
# consisting of a header line and multiple columns.
#The header is retained and used to identify the sample ID column,
# and sample lines are retained if their ID is not in the exclude list.
BEGIN{
FS="\t";
OFS=FS;
if (length(header) == 0) {
header=0;
};
if (length(samplecolname) == 0 && length(samplecol) == 0) {
samplecol=1;
};
if (length(negate) == 0) {
negate=0;
};
filenum=0;
}
FNR==1{
filenum++;
#Auto-detect space-separation vs. tab-separation:
#This approach has a bug if the second file has a single column
# that contains spaces.
if (filenum==2) {
test_n_cols=split($1, test_cols, " ");
if (NF == 1 && test_n_cols > 1) {
FS=" ";
};
};
}
filenum==1{
exclude[$1]=1;
}
filenum==2{
if (FNR==1 && header > 0) {
# samplecol=1;
if (length(samplecolname) > 0) {
for (i=1; i<=NF; i++) {
if ($i == samplecolname) {
samplecol=i;
};
};
};
print;
} else if (negate > 0) {
if ($samplecol in exclude) {
print;
};
} else if (!($samplecol in exclude)) {
print;
};
}