Skip to content

Commit c012feb

Browse files
committed
Benchmarking
1 parent fd906d6 commit c012feb

31 files changed

+3845
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/python
2+
import sys
3+
import re
4+
5+
def get_mutations(edge_list, u):
6+
# find parent
7+
s = re.split("_|;|^", u)
8+
mutations = set()
9+
for i in s:
10+
if i.isdigit():
11+
mutations.add(int(i))
12+
#print mutations
13+
14+
for edge in edge_list:
15+
uu = edge[0]
16+
vv = edge[1]
17+
if vv == u:
18+
return mutations | get_mutations(edge_list, uu)
19+
20+
return mutations
21+
22+
def parse_clone_tree(filename_T, filename_l):
23+
edges = []
24+
with open(filename_T) as f:
25+
for line in f:
26+
s = line.rstrip("\n").split(" ")
27+
edges += [(s[0], s[1])]
28+
29+
labeling = {}
30+
with open(filename_l) as f:
31+
for line in f:
32+
s = line.rstrip("\n").split(" ")
33+
labeling[s[0]] = s[1]
34+
35+
# find migration edges
36+
migration_edges = []
37+
for (u, v) in edges:
38+
if labeling[u] != labeling[v]:
39+
migration_edges += [(u,v)]
40+
41+
return edges, migration_edges
42+
43+
def identify_seeding_clones(edge_list, migration_edge_list):
44+
res = set()
45+
for (u,v) in migration_edge_list:
46+
muts_u = get_mutations(edge_list, u)
47+
muts_v = get_mutations(edge_list, v)
48+
res.add(frozenset(muts_u))
49+
50+
return res
51+
52+
def parse_migration_graph(filename_G):
53+
edges = []
54+
with open(filename_G) as f:
55+
for line in f:
56+
s = line.rstrip("\n").split(" ")
57+
edges += [(s[0], s[1])]
58+
59+
return edges
60+
61+
def multi_graph_to_set(edge_list):
62+
count = {}
63+
res = set()
64+
for edge in edge_list:
65+
if edge not in count:
66+
count[edge] = 1
67+
else:
68+
count[edge] += 1
69+
res.add((edge[0], edge[1], count[edge]))
70+
return res
71+
72+
if __name__ == "__main__":
73+
if len(sys.argv) != 7:
74+
sys.stderr.write("Usage: %s <SIMULATED_CLONE_TREE> <SIMULATED_VERTEX_LABELING> <SIMULATED_MIGRATION_GRAPH>"
75+
" <INFERRED_CLONE_TREE> <INFERRED_VERTEX_LABELING> <INFERRED_MIGRATION_GRAPH>\n" % sys.argv[0])
76+
sys.exit(1)
77+
78+
edges_simulated, mig_edges_simulated = parse_clone_tree(sys.argv[1], sys.argv[2])
79+
seeding_clones_simulated = identify_seeding_clones(edges_simulated, mig_edges_simulated)
80+
81+
edges_inferred, mig_edges_inferred = parse_clone_tree(sys.argv[4], sys.argv[5])
82+
seeding_clones_inferred = identify_seeding_clones(edges_inferred, mig_edges_inferred)
83+
84+
recall = float(len(seeding_clones_inferred & seeding_clones_simulated)) / float(len(seeding_clones_simulated))
85+
precision = float(len(seeding_clones_inferred & seeding_clones_simulated)) / float(len(seeding_clones_inferred))
86+
F = 2.0 / ((1.0 / recall) + (1.0 / precision))
87+
88+
edge_set_G_simulated = set(parse_migration_graph(sys.argv[3]))
89+
edge_set_G_inferred = set(parse_migration_graph(sys.argv[6]))
90+
91+
edge_multiset_G_simulated = multi_graph_to_set(parse_migration_graph(sys.argv[3]))
92+
edge_multiset_G_inferred = multi_graph_to_set(parse_migration_graph(sys.argv[6]))
93+
94+
recall_G = float(len(edge_set_G_inferred & edge_set_G_simulated)) / float(len(edge_set_G_simulated))
95+
precision_G = float(len(edge_set_G_inferred & edge_set_G_simulated)) / float(len(edge_set_G_inferred))
96+
97+
recall_G2 = float(len(edge_multiset_G_inferred & edge_multiset_G_simulated)) / float(len(edge_multiset_G_simulated))
98+
precision_G2 = float(len(edge_multiset_G_inferred & edge_multiset_G_simulated)) / float(len(edge_multiset_G_inferred))
99+
100+
F_G = 2.0 / ((1.0 / recall_G) + (1.0 / precision_G))
101+
F_G2 = 2.0 / ((1.0 / recall_G2) + (1.0 / precision_G2))
102+
103+
print ",".join(map(str, [recall, precision, F, recall_G, precision_G, F_G, recall_G2, precision_G2, F_G2]))
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/python
2+
import sys
3+
4+
if __name__ == "__main__":
5+
if len(sys.argv) != 2:
6+
sys.stderr.write("Usage: %s <MACHINA_PMH_CTI_RESULT>\n" % sys.argv[0])
7+
sys.exit(1)
8+
9+
trees = set()
10+
min_score = (100,100,100)
11+
with open(sys.argv[1]) as f:
12+
for line in f:
13+
s = line.rstrip("\n").split("\t")
14+
idx = int(s[0].rstrip("-"))
15+
if s[2] != '-':
16+
score = (int(s[2]), int(s[3]), int(s[4]))
17+
pattern = s[1].lstrip("(").rstrip(")").split(", ")[-1]
18+
if pattern != "R": continue
19+
if score < min_score:
20+
min_score = score
21+
trees = set()
22+
if score == min_score:
23+
trees.add((idx, pattern, s[5]))
24+
25+
for t in trees:
26+
print ",".join(map(str, [t[0], t[1], t[2]] + list(min_score)))
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
if [ ! $# -eq 3 ]
3+
then
4+
echo "Usage: $0 <RESULT_DIR> <rf_executable> <m>" >&2
5+
exit 1
6+
fi
7+
m=$3
8+
9+
echo "pattern,seed,mut_tree,enforced,inferred,mu,gamma,sigma,method,RF,recallT,precisionT,FscoreT,recallG,precisionG,FscoreG,recallMultiG,precisionMultiG,FscoreMultiG"
10+
for f in $1/*.txt
11+
do
12+
d=`basename $f .txt`
13+
p=$(echo $d | sed -e s/_.*//g)
14+
s=$(echo $d | sed -e s/.*_//g)
15+
for t in `python extract_minimum.py $f`
16+
do
17+
pattern=$(echo $t | cut -d',' -f2)
18+
idx=$(echo $t | cut -d',' -f1)
19+
rf=$(echo -n $($2 ../../../data/sims/${m}/$p/T_seed$s.tree ../../../data/sims/$m/$p/T_seed$s.labeling $1/$d/${idx}-T-P-${pattern}.tree $1/$d/${idx}-T-P-${pattern}.labeling | tail -n 1 | cut -d' ' -f3))
20+
21+
if [ $p == "mS" ];
22+
then
23+
echo -n $p,
24+
else
25+
echo -n p$p,
26+
fi
27+
28+
echo -n $s,$t,MACHINA,$rf,
29+
python evaluate_mig_history.py ../../../data/sims/${m}/$p/T_seed${s}.tree ../../../data/sims/$m/$p/T_seed${s}.vertex.labeling ../../../data/sims/$m/$p/G_seed${s}.tree $1/${d}/${idx}-T-P-${pattern}.tree $1/${d}/${idx}-T-P-${pattern}.labeling $1/${d}/${idx}-G-P-${pattern}.tree
30+
#tail -n 1 ${p}_seed${s}.RF.txt | cut -d' ' -f3 >> results_m7.txt
31+
done
32+
done
33+

0 commit comments

Comments
 (0)