-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtaxon_table.R
72 lines (57 loc) · 2.1 KB
/
taxon_table.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/Rscript
#merge taxons of otu table without normalization.
#TABLE="otu_table_example.tsv"; INDEX2=29; RANK=5; MINab=750; OUT1="test.tsv"
#INPUT
#Tabular formatted OTU table as "otu_table_example.tsv"
#
#args
# 1: input OTU table tabular separated with "taxonomy" column in the last column, taxonomy separated with semicolon
# 2: index of the last sample column
# 3: taxonomic rank to output, i.e. 5 for family 6 for genus
# 4: min total abundance to discard low abundance sample
# 5: output name
args=commandArgs(trailingOnly=TRUE)
TABLE=as.character(args[1])
INDEX2=as.numeric(args[2])
RANK=as.numeric(args[3])
MINab=as.numeric(args[4])
OUT1=as.character(args[5])
wd1=Sys.getenv('PWD')
if(length(args)<5){print("USAGE: taxon_table.R input_table.tsv[chr] index_last_sample[num] rank[num] min_abundance[num] name_outpu_table[chr]")
cat("\n\nINPUT\n#Tabular formatted OTU table as otu_table_example.tsv
args
1: input OTU table tabular separated with taxonomy column in the last column, taxonomy separated with semicolon
2: index of the last sample column
3: taxonomic rank to output, i.e. 5 for family 6 for genus
4: min total abundance to discard low abundance sample
5: output name \n")}else{
#setwd("/home/pri/Desktop/farm2cheese/DATAS/00_contigs/00_fishflow/phyloseq")
A=read.table(TABLE, sep="\t", h=TRUE)
Asample=A[,2:INDEX2] #84
rownames(Asample)=A[,1]
#Taxonomy extraction
nsplt=strsplit(as.character(A$taxonomy), "[;]")
namesTAB=as.character(do.call(rbind.data.frame, nsplt)[,RANK])
MINeff=min(apply(Asample,1,sum))
#Transpose
B1=as.data.frame(Asample)
#Sum multiple occurence of the same taxon
stock1=NULL
for (i in 1:dim(B1)[2]){
#print(names(B1)[i])
res=tapply(B1[,i],namesTAB,sum)
stock1=cbind(stock1, res)
}
B=as.data.frame(stock1)
colnames(B)=colnames(B1)
#Discard sample with low abundance
sum1=apply(B, 2, sum)
C=B[,sum1>=MINab]
if(ncol(B)==ncol(C)){
print("No sample discarded")} else {
print("Sample discarded:");print(names(B[,sum1<MINab]))
}
D=cbind.data.frame(taxon=row.names(C),C)
#Output table
write.table(D, OUT1, sep="\t", quote=FALSE, row.names=FALSE)
}