-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtreepl_wrapper.sh
103 lines (76 loc) · 2.42 KB
/
treepl_wrapper.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
#version: 0.9
#Tongjian Liu, molevoliutongjian@gmail.com
#https://github.com/tongjial/treepl_wrapper
# check the arguments
if [ $# != 3 ]
then
echo -e "Error: incorrect usage"
echo -e "usage:\ntreepl_wrapper.sh configuration treefile label"
exit 1
fi
# check the configure and tree file
if [ -f $1 ] && [ -f $2 ]
then
echo -e "================================================================"
date
echo -e "start running, good luck!"
echo -e "================================================================"
else
echo -e "Error: can't find the files"
echo -e "usage:\ntreepl_wrapper.sh configuration treefile label"
exit 2
fi
echo -e "treepl_wrapper.sh $1 $2 $3"
# generate the prime configure file
cat $1 |\
awk 'BEGIN{print "treefile = '$2'"}{print}END{print "thorough\nprime"}' \
> configure\_prime\_$3
# run primes
for num in $(seq 100)
do treePL configure\_prime\_$3 |\
sed -n '/^opt/,$p' |\
sed 's/.*\(.\)/\1/' |\
sed ':a;N;$!ba;s/\n/ /g' \
>> prime\_$3
done
# generate the cv configure file
cat configure\_prime\_$3 |\
sed 's/^prime/#&/' |\
awk '{print}END{print "cv\ncvoutfile = cv_'$3'\ncvstart = 0.0001\ncvstop = 10000"}' \
> configure\_cv\_$3
# chose the most frequent cv optimal parameters
sort prime\_$3 | uniq -c | sort -nr | sed q | sed 's/^[ \t]*//' |\
awk '{if($3 != "l") $2=$2" o"; print}' |\
awk '{if($5 != "d") $4=$4" o"; print}' |\
awk '{if($7 != "d") $6=$6" o"; print}' |\
sed 's/ /\n/g' | sed '1d' |\
sed 's/o/#/g' |\
sed '1s/^/opt = &/' |\
sed '2s/l/moredetail/' |\
sed '3s/^/optad = &/' |\
sed '4s/d/moredetailad/' |\
sed '5s/^/optcvad = &/' |\
sed '6s/d/moredetailcvad/' >> configure\_cv\_$3
# perform cross validation
treePL configure\_cv\_$3
# generate the smooth configure file
cat configure\_cv\_$3 |\
sed 's/^cv/#&/' |\
awk '{print}END{print "outfile = treepl_'$3'.tre"}' \
> configure\_smooth\_$3
# find the smallest cv score
cat cv\_$3 |\
awk '{printf "%f\t%s\n",$3,$2}' |\
sort -n | sed q | awk '{print $2}' |\
sed 's/[()]//g;s/^/smoothing = /' \
>> configure\_smooth\_$3
# the last step
treePL configure\_smooth\_$3
# clean temporary files
# rm configure\_cv\_$3 configure\_prime\_$3 configure\_smooth\_$3 cv\_$3 out_dates.tre out_dates.tre.r8s prime\_$3
echo -e "================================================================"
echo -e "running finished, please check the result and report the bugs"
date
echo -e "================================================================"
exit 0