Skip to content

Commit 740ac6e

Browse files
author
Roderick Bovee
committed
Speed up lowest_common_ancestor by splitting lookup into doublets
1 parent 5358370 commit 740ac6e

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

taxonomy/core.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,25 @@ def parents(self, tax_id):
117117
return list(networkx.dfs_preorder_nodes(self.tax_graph, tax_id))[1:]
118118

119119
def lowest_common_ancestor(self, *tax_ids):
120+
len_tax_ids = len(tax_ids)
121+
if len_tax_ids == 0:
122+
return None
123+
elif len_tax_ids == 1:
124+
return tax_ids[0]
125+
120126
if not all(self.tax_graph.has_node(tax_id) for tax_id in tax_ids):
121127
raise KeyError('Tax IDs {} not in taxonomy'.format(','.join(str(t) for t in tax_ids)))
122128

123-
parent_gens = [networkx.dfs_postorder_nodes(self.tax_graph, tax_id) for tax_id in tax_ids]
129+
return reduce(self._lowest_common_ancestor_double, tax_ids)
130+
131+
def _lowest_common_ancestor_double(self, tax_id_1, tax_id_2):
132+
parent_gen_1 = networkx.dfs_postorder_nodes(self.tax_graph, tax_id_1)
133+
parent_gen_2 = networkx.dfs_postorder_nodes(self.tax_graph, tax_id_2)
124134
parent_id = None
125-
for tax_ids in itertools.izip(*parent_gens):
126-
if tax_ids.count(tax_ids[0]) != len(tax_ids):
135+
for ptax_id_1, ptax_id_2 in itertools.izip(parent_gen_1, parent_gen_2):
136+
if ptax_id_1 != ptax_id_2:
127137
break
128-
parent_id = tax_ids[0]
138+
parent_id = ptax_id_1
129139

130140
# the tax_ids don't share a common parent; return None
131141
return parent_id

0 commit comments

Comments
 (0)