Skip to content

Commit

Permalink
Merge pull request #1439 from tomkinsc/ct-bugfix-beast-import
Browse files Browse the repository at this point in the history
bugfix tree file opening in beast import
  • Loading branch information
joverlee521 authored Mar 15, 2024
2 parents a9572d5 + 1138c65 commit 4dd23c5
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 39 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@

* filter: Updated docs with an example of tiered subsampling. [#1425][] (@victorlin)
* export: Fixes bug [#1433] introduced in v23.1.0, that causes validation to fail when gene names start with `nuc`, e.g. `nucleocapsid`. [#1434][] (@corneliusroemer)
* import: Fixes bug introduced in v24.2.0 that prevented `import beast` from running. [#1439][] (@tomkinsc)

[#1425]: https://github.com/nextstrain/augur/pull/1425
[#1429]: https://github.com/nextstrain/augur/pull/1429
[#1433]: https://github.com/nextstrain/augur/issues/1433
[#1434]: https://github.com/nextstrain/augur/pull/1434
[#1436]: https://github.com/nextstrain/augur/pull/1436
[#1439]: https://github.com/nextstrain/augur/pull/1439

## 24.2.3 (23 February 2024)

Expand Down
70 changes: 31 additions & 39 deletions augur/import_/beast.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,45 +233,37 @@ def parse_nexus(tree_path, treestring_regex=r'tree [A-Za-z\_]+([0-9]+)', verbose
tipNum=0
tree=None

if isinstance(tree_path,str): ## determine if path or handle was provided to function
try:
handle=open_file(tree_path,'r')
except FileNotFoundError:
print("FATAL: No such file {}".format(tree_path))
sys.exit(2)
else:
handle=tree_path

for line in handle: ## iterate over lines
l=line.strip('\n')

nTaxa=re.search(r'dimensions ntax=([0-9]+);',l.lower()) ## get number of tips that should be in tree
if nTaxa is not None:
tipNum=int(nTaxa.group(1))
if verbose:
print('File should contain %d taxa'%(tipNum))

treeString=re.search(treestring_regex,l) ## search for line with the tree
if treeString is not None:
treeString_start=l.index('(') ## find index of where tree string starts
tree=parse_beast_tree(l[treeString_start:], tipMap=tips, verbose=verbose) ## parse tree string

if verbose:
print('Identified tree string')

if tipFlag==True: ## going through tip encoding block
tipEncoding=re.search(r'([0-9]+) ([A-Za-z\-\_\/\.\'0-9 \|?]+)',l) ## search for key:value pairs
if tipEncoding is not None:
tips[tipEncoding.group(1)]=tipEncoding.group(2).strip('"').strip("'") ## add to tips dict
if verbose==True:
print('Identified tip translation %s: %s'%(tipEncoding.group(1),tips[tipEncoding.group(1)]))
elif ';' not in l:
print('tip not captured by regex:',l.replace('\t',''))

if 'translate' in l.lower(): ## tip encoding starts on next line
tipFlag=True
if ';' in l:
tipFlag=False
with open_file(tree_path,'r') as handle: ## open tree_path as file, or consume directly if already a file handle
for line in handle: ## iterate over lines
l=line.strip('\n')

nTaxa=re.search(r'dimensions ntax=([0-9]+);',l.lower()) ## get number of tips that should be in tree
if nTaxa is not None:
tipNum=int(nTaxa.group(1))
if verbose:
print('File should contain %d taxa'%(tipNum))

treeString=re.search(treestring_regex,l) ## search for line with the tree
if treeString is not None:
treeString_start=l.index('(') ## find index of where tree string starts
tree=parse_beast_tree(l[treeString_start:], tipMap=tips, verbose=verbose) ## parse tree string

if verbose:
print('Identified tree string')

if tipFlag==True: ## going through tip encoding block
tipEncoding=re.search(r'([0-9]+) ([A-Za-z\-\_\/\.\'0-9 \|?]+)',l) ## search for key:value pairs
if tipEncoding is not None:
tips[tipEncoding.group(1)]=tipEncoding.group(2).strip('"').strip("'") ## add to tips dict
if verbose==True:
print('Identified tip translation %s: %s'%(tipEncoding.group(1),tips[tipEncoding.group(1)]))
elif ';' not in l:
print('tip not captured by regex:',l.replace('\t',''))

if 'translate' in l.lower(): ## tip encoding starts on next line
tipFlag=True
if ';' in l:
tipFlag=False

assert tree,'Tree not captured by regex'
assert tree.count_terminals()==tipNum,'Not all tips have been parsed.'
Expand Down

0 comments on commit 4dd23c5

Please sign in to comment.