|
| 1 | +#!/usr/bin/python |
| 2 | +import sys |
| 3 | +import re |
| 4 | + |
| 5 | +def get_mutations(edge_list, u): |
| 6 | + # find parent |
| 7 | + s = re.split("_|;|^", u) |
| 8 | + mutations = set() |
| 9 | + for i in s: |
| 10 | + if i.isdigit(): |
| 11 | + mutations.add(int(i)) |
| 12 | + #print mutations |
| 13 | + |
| 14 | + for edge in edge_list: |
| 15 | + uu = edge[0] |
| 16 | + vv = edge[1] |
| 17 | + if vv == u: |
| 18 | + return mutations | get_mutations(edge_list, uu) |
| 19 | + |
| 20 | + return mutations |
| 21 | + |
| 22 | +def parse_clone_tree(filename_T, filename_l): |
| 23 | + edges = [] |
| 24 | + with open(filename_T) as f: |
| 25 | + for line in f: |
| 26 | + s = line.rstrip("\n").split(" ") |
| 27 | + edges += [(s[0], s[1])] |
| 28 | + |
| 29 | + labeling = {} |
| 30 | + with open(filename_l) as f: |
| 31 | + for line in f: |
| 32 | + s = line.rstrip("\n").split(" ") |
| 33 | + labeling[s[0]] = s[1] |
| 34 | + |
| 35 | + # find migration edges |
| 36 | + migration_edges = [] |
| 37 | + for (u, v) in edges: |
| 38 | + if labeling[u] != labeling[v]: |
| 39 | + migration_edges += [(u,v)] |
| 40 | + |
| 41 | + return edges, migration_edges |
| 42 | + |
| 43 | +def identify_seeding_clones(edge_list, migration_edge_list): |
| 44 | + res = set() |
| 45 | + for (u,v) in migration_edge_list: |
| 46 | + muts_u = get_mutations(edge_list, u) |
| 47 | + muts_v = get_mutations(edge_list, v) |
| 48 | + res.add(frozenset(muts_u)) |
| 49 | + |
| 50 | + return res |
| 51 | + |
| 52 | +def parse_migration_graph(filename_G): |
| 53 | + edges = [] |
| 54 | + with open(filename_G) as f: |
| 55 | + for line in f: |
| 56 | + s = line.rstrip("\n").split(" ") |
| 57 | + edges += [(s[0], s[1])] |
| 58 | + |
| 59 | + return edges |
| 60 | + |
| 61 | +def multi_graph_to_set(edge_list): |
| 62 | + count = {} |
| 63 | + res = set() |
| 64 | + for edge in edge_list: |
| 65 | + if edge not in count: |
| 66 | + count[edge] = 1 |
| 67 | + else: |
| 68 | + count[edge] += 1 |
| 69 | + res.add((edge[0], edge[1], count[edge])) |
| 70 | + return res |
| 71 | + |
| 72 | +if __name__ == "__main__": |
| 73 | + if len(sys.argv) != 7: |
| 74 | + sys.stderr.write("Usage: %s <SIMULATED_CLONE_TREE> <SIMULATED_VERTEX_LABELING> <SIMULATED_MIGRATION_GRAPH>" |
| 75 | + " <INFERRED_CLONE_TREE> <INFERRED_VERTEX_LABELING> <INFERRED_MIGRATION_GRAPH>\n" % sys.argv[0]) |
| 76 | + sys.exit(1) |
| 77 | + |
| 78 | + edges_simulated, mig_edges_simulated = parse_clone_tree(sys.argv[1], sys.argv[2]) |
| 79 | + seeding_clones_simulated = identify_seeding_clones(edges_simulated, mig_edges_simulated) |
| 80 | + |
| 81 | + edges_inferred, mig_edges_inferred = parse_clone_tree(sys.argv[4], sys.argv[5]) |
| 82 | + seeding_clones_inferred = identify_seeding_clones(edges_inferred, mig_edges_inferred) |
| 83 | + |
| 84 | + recall = float(len(seeding_clones_inferred & seeding_clones_simulated)) / float(len(seeding_clones_simulated)) |
| 85 | + precision = float(len(seeding_clones_inferred & seeding_clones_simulated)) / float(len(seeding_clones_inferred)) |
| 86 | + F = 2.0 / ((1.0 / recall) + (1.0 / precision)) |
| 87 | + |
| 88 | + edge_set_G_simulated = set(parse_migration_graph(sys.argv[3])) |
| 89 | + edge_set_G_inferred = set(parse_migration_graph(sys.argv[6])) |
| 90 | + |
| 91 | + edge_multiset_G_simulated = multi_graph_to_set(parse_migration_graph(sys.argv[3])) |
| 92 | + edge_multiset_G_inferred = multi_graph_to_set(parse_migration_graph(sys.argv[6])) |
| 93 | + |
| 94 | + recall_G = float(len(edge_set_G_inferred & edge_set_G_simulated)) / float(len(edge_set_G_simulated)) |
| 95 | + precision_G = float(len(edge_set_G_inferred & edge_set_G_simulated)) / float(len(edge_set_G_inferred)) |
| 96 | + |
| 97 | + recall_G2 = float(len(edge_multiset_G_inferred & edge_multiset_G_simulated)) / float(len(edge_multiset_G_simulated)) |
| 98 | + precision_G2 = float(len(edge_multiset_G_inferred & edge_multiset_G_simulated)) / float(len(edge_multiset_G_inferred)) |
| 99 | + |
| 100 | + F_G = 2.0 / ((1.0 / recall_G) + (1.0 / precision_G)) |
| 101 | + F_G2 = 2.0 / ((1.0 / recall_G2) + (1.0 / precision_G2)) |
| 102 | + |
| 103 | + print ",".join(map(str, [recall, precision, F, recall_G, precision_G, F_G, recall_G2, precision_G2, F_G2])) |
0 commit comments