Skip to content

Commit

Permalink
docs/help; flake8; check for foundation taxonomy
Browse files Browse the repository at this point in the history
If foundation tree is provided, check for foundation taxonomy. If not there, exit.

Lots of flake8 line length issues (pep8)
  • Loading branch information
JTFouquier committed Apr 19, 2018
1 parent 95c51a1 commit d22899e
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 47 deletions.
6 changes: 4 additions & 2 deletions ghosttree/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ def filter_positions(alignment_fh, maximum_gap_frequency,
def _filter_gap_positions(aln, maximum_gap_frequency):

aln_gap_frequencies = (aln.gap_frequencies(axis='sequence',
relative=False) / aln._seqs.count())
aln_gap_frequencies_boolean = (aln_gap_frequencies <= maximum_gap_frequency)
relative=False) /
aln._seqs.count())
aln_gap_frequencies_boolean = (aln_gap_frequencies <=
maximum_gap_frequency)
aln = aln.iloc[:, aln_gap_frequencies_boolean]

return aln
Expand Down
28 changes: 19 additions & 9 deletions ghosttree/scaffold/hybridtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import shutil
import subprocess
import sys

import skbio
import pandas as pd
Expand Down Expand Up @@ -57,10 +58,16 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
accession numbers are the same as in the otu_file_fh and
extension_taxonomy_fh.
foundation_alignment_fh : filehandle
File containing pre-aligned sequences from a genetic marker database
in .fasta format. This file refers to the "foundation" of the
ghost-tree. Contains accession numbers and taxonomy labels.
foundation_fh : filehandle
File containing EITHER pre-aligned sequences from a genetic marker
database in .fasta format OR a newick tree. This file refers to the
"foundation" of the ghost-tree.
.fasta contains accession numbers *and* taxonomy labels.
.nwk tree is a tree with accession numbers. MUST supply a foundation
taxonomy if using a tree as a foundation. Pass this via the
--foundation-taxonomy option. see --help for details
ghost_tree_fp : folder
Output folder contains files including:
Expand Down Expand Up @@ -100,6 +107,10 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
sniffer_results = skbio.io.sniff(foundation_fh)[0]

if sniffer_results == 'newick':
if foundation_taxonomy is None:
sys.exit("ghost-tree error: You must provide a foundation "
"taxonomy if using a foundation tree. Pass the taxonomy "
"file using the '--foundation-taxonomy' flag.")
foundation_tree = \
_make_nr_foundation_newick(foundation_fh,
extension_genus_accession_list_dic,
Expand Down Expand Up @@ -144,8 +155,8 @@ def extensions_onto_foundation(otu_file_fh, extension_taxonomy_fh,
mini_tree = skbio.io.read("tmp/mini_tree_gt.nwk", format='newick',
into=skbio.TreeNode)
node.extend(mini_tree.root_at_midpoint().children[:])
print('GRAFT LEVEL: ', graft_letter)
print(foundation_tree.ascii_art())
# print('GRAFT LEVEL: ', graft_letter)
# print(foundation_tree.ascii_art())
shutil.rmtree("tmp")
ghost_tree_nwk = open(ghost_tree_fp + "/ghost_tree.nwk", "w")
ghost_tree_nwk.write(str(foundation_tree))
Expand Down Expand Up @@ -246,8 +257,8 @@ def _make_nr_foundation_newick(foundation_fh,
for graft_taxa in all_genus_list:
if_case = (re.search(";" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(graft_letter + "__" + graft_taxa.lower() + ";",
foundation_taxonomy.lower()) or
re.search(graft_letter + "__" + graft_taxa.lower() +
";", foundation_taxonomy.lower()) or
re.search(";" + graft_taxa.lower(),
foundation_taxonomy.lower()))

Expand Down Expand Up @@ -335,7 +346,6 @@ def _collapse(tax, level):


def _graft_functions(graft_level):
print(graft_level)
graft_letter = graft_level
graft_level_map = {'p': 2, 'c': 3, 'o': 4, 'f': 5, 'g': 6}
graft_level = graft_level_map[graft_letter]
Expand Down
6 changes: 3 additions & 3 deletions ghosttree/scaffold/tests/test_hybridtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def test_newick_file_few_extensions(self):
self.graft_letter_g,
self.foundation_taxonomy)
result = str(result)
trimmed_tree = '(CBB3:0.44621,PBB1:0.08710999999999999)0.801:0.14776;\n'
trimmed_tree = '(CBB3:0.44621,PBB1:0.08710999999999999)' \
'0.801:0.14776;\n'
self.assertEqual(result, trimmed_tree)

def test_graft_functions(self):
Expand All @@ -82,8 +83,7 @@ def test_create_taxonomy_dic_many_genus(self):

def test_create_taxonomy_dic_none(self):
with self.assertRaises(ValueError):
list(_create_taxonomy_dict(self.extension_taxonomy_none,
9))
list(_create_taxonomy_dict(self.extension_taxonomy_none, 9))

def test_make_mini_otu_files(self):
os.system("mkdir tmp")
Expand Down
3 changes: 2 additions & 1 deletion ghosttree/silva/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def test_fasta_with_fungi(self):
self.accession,
self.taxonomy_with_fungi)
self.assertEqual(list(result),
[Sequence("ATCG", metadata={'id': "AB21", 'description': "Fungi"})])
[Sequence("ATCG", metadata={'id': "AB21",
'description': "Fungi"})])

def test_fasta_with_many_fungi(self):
result = fungi_from_fasta(self.fasta_many_fungi,
Expand Down
75 changes: 44 additions & 31 deletions scripts/ghost-tree
Original file line number Diff line number Diff line change
Expand Up @@ -177,22 +177,26 @@ def scaffold():
@click.argument("ghost-tree-output-folder", type=click.STRING)
@click.option('--graft-level', type=click.Choice(['p', 'c', 'o', 'f', 'g']),
default='g',
help="Taxonomic level to graft extensions to foundation; this "
"option is experimental; default is g (genus)")
@click.option('-i', '--foundation-taxonomy', type=click.File('U'))
@click.option("--stderr", is_flag=True, help="'saves error log file")
@click.option("--foundation-alignment", is_flag=True,
help="saves non-redundant foundation alignment file")
@click.option("--foundation-tree", is_flag=True,
help="saves foundation phylogenetic tree file")
help="Taxonomic level to graft extensions to foundation. "
"Default is g (genus). p=phylum, c=class, o=order, "
"f=family.")
@click.option('-i', '--foundation-taxonomy', type=click.File('U'),
help="If you use newick tree for your foundation, this is "
"REQUIRED. This is a tab-delimited accession ID and "
"taxonomy line file.")
@click.option("--stderr", is_flag=True, help="Saves error log file "
"(optional)")
@click.option("--save-foundation-alignment", is_flag=True,
help="Saves non-redundant foundation alignment file (optional)")
@click.option("--save-foundation-tree", is_flag=True,
help="Saves foundation phylogenetic tree file (optional)")
@click.option("--exclude-id-list", is_flag=True,
help="will not save accession id file")
def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
foundation_tree,
exclude_id_list,
extension_trees_otu_map, extension_trees_taxonomy_file,
extension_trees_sequence_file, foundation_file,
ghost_tree_output_folder):
help="Will not save accession id file (optional)")
def hybrid_tree(graft_level, foundation_taxonomy, stderr,
save_foundation_alignment, save_foundation_tree,
exclude_id_list, extension_trees_otu_map,
extension_trees_taxonomy_file, extension_trees_sequence_file,
foundation_file, ghost_tree_output_folder):
"""Combines two genetic databases into one phylogenetic tree and is the
final command in ghost-tree.
Expand All @@ -205,7 +209,7 @@ def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
taxonomic resolution.
The output file is one phylogenetic tree in .nwk format, which has the
benefits of both databases, but allows sequencing to be performed using
benefits of both databases. This allows sequencing to be performed using
only one primer set, the "extension trees" primer set.
Arguments:
Expand All @@ -218,7 +222,7 @@ def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
accession numbers, which are part of that OTU cluster (as in output of
"ghost-tree group-extensions"). This file refers to the "extension
trees". File references to sequence reads or sample numbers/names are
not valid here. This is not an OTU .biom table.
not valid here. This is NOT the same as an OTU .biom table.
2) extension-trees-taxonomy-file:
Tab-delimited text file related to "extension trees" wih the 1st
Expand All @@ -234,20 +238,29 @@ def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
accession numbers are the same as in the extension-trees-otu-map and
extension-trees-taxonomy-file.
4) foundation-alignment-file:
File containing pre-aligned sequences from a genetic marker database
in .fasta format. This file refers to the "foundation" of the
ghost-tree. Contains accession numbers and taxonomy labels.
4) foundation-file:
File containing EITHER pre-aligned sequences from a genetic marker
database in .fasta format OR a newick tree. This file refers to the
"foundation" of the ghost-tree.
.fasta contains accession numbers *and* taxonomy labels.
.nwk tree is a tree with accession numbers. MUST supply a foundation
taxonomy if using a tree as a foundation. Pass this via the
--foundation-taxonomy option. see --help for details
5) ghost-tree-output-folder:
Output folder contains files including:
a) The Newick formatted ghost-tree, which is the final output of the
ghost-tree tool. This is a phylogenetic tree designed for
downstream diversity analyses.
b) Accession IDs from the ghost-tree.nwk file that you can use for
downstream analyses tools
c) log error file (this is an optional file that you can have if you
type '--stderr')
Output folder contains files including:
a) The Newick formatted ghost-tree, which is the final output of the
ghost-tree tool. This is a phylogenetic tree designed for
downstream diversity analyses.
b) Accession IDs from the ghost-tree.nwk file that you can use for
downstream analyses tools
c) log error file (this is an optional file that you can have if you
type '--stderr')
"""
_, std_error = extensions_onto_foundation(extension_trees_otu_map,
extension_trees_taxonomy_file,
Expand All @@ -261,7 +274,7 @@ def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
logfile.write(std_error)
logfile.close()

if foundation_alignment:
if save_foundation_alignment:
pass
else:
try:
Expand All @@ -270,7 +283,7 @@ def hybrid_tree(graft_level, foundation_taxonomy, stderr, foundation_alignment,
except FileNotFoundError:
pass

if foundation_tree:
if save_foundation_tree:
pass
else:
try:
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[flake8]
filename = *.py
exclude = *.pyc, __pycache__
max-line-length = 120
ignore=E722

0 comments on commit d22899e

Please sign in to comment.