|
13 | 13 | import os
|
14 | 14 | import re
|
15 | 15 | import pprint
|
| 16 | +from difflib import get_close_matches |
16 | 17 |
|
17 | 18 | import sourmash
|
18 | 19 | import sourmash_utils
|
@@ -222,23 +223,27 @@ def pangenome_createdb_main(args):
|
222 | 223 | ident = tax_utils.get_ident(name)
|
223 | 224 |
|
224 | 225 | # grab relevant lineage name
|
225 |
| - try: |
226 |
| - lineage_tup = taxdb[ident] |
227 |
| - except KeyError: # older versions of genbank are not named with version! |
228 |
| - try: |
229 |
| - if "." in ident: |
230 |
| - short_ident = ident.split(".")[0] |
231 |
| - lineage_tup = taxdb[short_ident] |
232 |
| - else: |
233 |
| - for i in range(1, 10): |
234 |
| - try: |
235 |
| - new_ident = f"{ident}.{i}" |
236 |
| - lineage_tup = taxdb[new_ident] |
237 |
| - break |
238 |
| - except KeyError: |
239 |
| - continue |
240 |
| - except: |
241 |
| - print("Wow, that sucks!") |
| 226 | + lineage_tup = taxdb.get(ident) |
| 227 | + |
| 228 | + # not found and has a .? maybe we can strip off the version. |
| 229 | + if lineage_tup is None and "." in ident: |
| 230 | + short_ident = ident.split(".")[0] |
| 231 | + lineage_tup = taxdb.get(ident) |
| 232 | + |
| 233 | + # not found and has no .? Try many versions. |
| 234 | + if lineage_tup is None and "." not in ident: |
| 235 | + for i in range(1, 10): |
| 236 | + new_ident = f"{ident}.{i}" |
| 237 | + lineage_tup = taxdb.get(new_ident) |
| 238 | + if lineage_tup is not None: |
| 239 | + break |
| 240 | + |
| 241 | + if lineage_tup is None: |
| 242 | + print(f"cannot find ident {ident} in the provided taxonomy ifle.") |
| 243 | + print(f"The three closest matches to {ident} are:") |
| 244 | + for k in get_close_matches(ident, taxdb): |
| 245 | + print(f"* '{k}'") |
| 246 | + sys.exit(-1) |
242 | 247 |
|
243 | 248 | lineage_tup = tax_utils.RankLineageInfo(lineage=lineage_tup)
|
244 | 249 | lineage_pair = lineage_tup.lineage_at_rank(args.rank)
|
|
0 commit comments