Skip to content

Commit

Permalink
add foundation taxonomy option; flake8
Browse files Browse the repository at this point in the history
* add foundation taxonomy option. This is needed if you are supplying a TREE foundation!
  • Loading branch information
JTFouquier committed Apr 18, 2018
1 parent d9d6f08 commit ae4fa28
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 33 deletions.
2 changes: 1 addition & 1 deletion ghosttree/extensions/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ def preprocess_extension_tree_sequences(extension_sequences_fp,
"""
similarity_threshold = str(similarity_threshold)
os.system("sumaclust -g -f -t "+similarity_threshold+" -O " +
os.system("sumaclust -g -f -t " + similarity_threshold + " -O " +
"" + otu_formatted_fp + " " + extension_sequences_fp + "")
51 changes: 25 additions & 26 deletions ghosttree/scaffold/hybridtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
extension_seq_fh,
foundation_fh,
ghost_tree_fp,
graft_level):
graft_level, foundation_taxonomy):
"""Combines two genetic databases into one phylogenetic tree.
Some genetic databases provide finer taxonomic resolution,
Expand Down Expand Up @@ -103,15 +103,16 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
foundation_tree = \
_make_nr_foundation_newick(foundation_fh,
extension_genus_accession_list_dic,
graft_letter)
graft_letter, foundation_taxonomy)

if sniffer_results == 'fasta':
nr_foundation_alignment = \
_make_nr_foundation_alignment(foundation_fh,
extension_genus_accession_list_dic,
graft_letter)
skbio.io.write(nr_foundation_alignment,
into=ghost_tree_fp + "/nr_foundation_alignment_gt.fasta",
into=ghost_tree_fp +
"/nr_foundation_alignment_gt.fasta",
format="fasta")
foundation_tree, all_std_error = \
_make_foundation_tree(ghost_tree_fp +
Expand All @@ -138,8 +139,8 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
std_output, std_error = process.communicate()
all_std_error += "FastTree warnings for genus "+key_node+" are:\n" \
+ str(std_error) + "\n"
all_std_error += "FastTree warnings for genus " + key_node + \
" are:\n" + str(std_error) + "\n"
mini_tree = skbio.io.read("tmp/mini_tree_gt.nwk", format='newick',
into=skbio.TreeNode)
node.extend(mini_tree.root_at_midpoint().children[:])
Expand All @@ -159,7 +160,7 @@ def _make_accession_id_file(ghost_tree_fp):
output = open(ghost_tree_fp + "/ghost_tree_extension_accession_ids.txt",
"w")
for node in ghosttree.tips():
output.write(str(node.name)+"\n")
output.write(str(node.name) + "\n")
output.close()


Expand All @@ -170,7 +171,7 @@ def _make_mini_otu_files(key_node, extension_genus_accession_list_dic, seqs):

for seq in seqs:
if seq.metadata['id'] in keep:
fasta_format = ">"+seq.metadata['id']+"\n"+str(seq)+"\n"
fasta_format = ">" + seq.metadata['id'] + "\n" + str(seq) + "\n"
output_file.write(fasta_format)
output_file.close()

Expand Down Expand Up @@ -229,33 +230,31 @@ def _create_taxonomy_dict(extension_taxonomy_fh, graft_level):

def _make_nr_foundation_newick(foundation_fh,
extension_genus_accession_list_dic,
graft_letter):
graft_letter, foundation_taxonomy):
global foundation_accession_genus_dic
foundation_accession_genus_dic = {}
all_genus_list = list(extension_genus_accession_list_dic.keys())
foundation_tree = skbio.io.read(foundation_fh, format='newick',
into=skbio.TreeNode,
convert_underscores=False)

foundation_taxonomy = 'minitaxonomy_foundation.txt' # (TODO!!!!)
foundation_unique_accessions = []
with open(foundation_taxonomy, 'U') as fin:
for line in fin:
splitline = line.split('\t')
accession = splitline[0].strip()
foundation_taxonomy = splitline[1].strip()
for graft_taxa in all_genus_list:
if_case = (re.search(";" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(graft_letter + "__" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(";" + graft_taxa.lower(),
foundation_taxonomy.lower()))

if if_case:
all_genus_list.remove(graft_taxa)
foundation_accession_genus_dic[accession] = graft_taxa
foundation_unique_accessions.append(accession)
for line in foundation_taxonomy:
splitline = line.split('\t')
accession = splitline[0].strip()
foundation_taxonomy = splitline[1].strip()
for graft_taxa in all_genus_list:
if_case = (re.search(";" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(graft_letter + "__" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(";" + graft_taxa.lower(),
foundation_taxonomy.lower()))

if if_case:
all_genus_list.remove(graft_taxa)
foundation_accession_genus_dic[accession] = graft_taxa
foundation_unique_accessions.append(accession)

sheared_tree = foundation_tree.shear(foundation_unique_accessions)
return sheared_tree
Expand Down
16 changes: 11 additions & 5 deletions scripts/ghost-tree
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,17 @@ def scaffold():
default='g',
help="Taxonomic level to graft extensions to foundation; this "
"option is experimental; default is g (genus)")
@click.option('-i', '--foundation-taxonomy', type=click.File('U'))
@click.option("--stderr", is_flag=True, help="'saves error log file")
@click.option("--foundation_alignment", is_flag=True, help="saves non-redundant foundation alignment file")
@click.option("--foundation_tree", is_flag=True, help="saves foundation phylogenetic tree file")
@click.option("--exclude_id_list", is_flag=True, help="will not save accession id file")
def hybrid_tree(graft_level, stderr, foundation_alignment, foundation_tree, exclude_id_list,
@click.option("--foundation-alignment", is_flag=True,
help="saves non-redundant foundation alignment file")
@click.option("--foundation-tree", is_flag=True,
help="saves foundation phylogenetic tree file")
@click.option("--exclude-id-list", is_flag=True,
help="will not save accession id file")
def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
foundation_tree,
exclude_id_list,
extension_trees_otu_map, extension_trees_taxonomy_file,
extension_trees_sequence_file, foundation_file,
ghost_tree_output_folder):
Expand Down Expand Up @@ -248,7 +254,7 @@ def hybrid_tree(graft_level, stderr, foundation_alignment, foundation_tree, excl
extension_trees_sequence_file,
foundation_file,
ghost_tree_output_folder,
graft_level)
graft_level, foundation_taxonomy)

if stderr:
logfile = open(ghost_tree_output_folder + "/ghost_tree_log.txt", "w")
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[flake8]
filename = *
filename = *.py
exclude = *.pyc, __pycache__
max-line-length = 120
ignore=E722

0 comments on commit ae4fa28

Please sign in to comment.