Skip to content

Commit

Permalink
Merge pull request smirarab#28 from kodicollins/master
Browse files Browse the repository at this point in the history
new sub-aligners within PASTA
  • Loading branch information
smirarab authored Apr 6, 2018
2 parents 91cee8a + e0ef70d commit 181d42e
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 9 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ You need to rename these directories and remove the `-master` part.
Finally, those with 32-bit Linux machines need to be aware that the master branch has 64-bit binaries.
32-bit binaries are provided in the `32bit` branch of `sate-tools-linux` git project (so download [this zip file](https://github.com/smirarab/sate-tools-linux/archive/32bit.zip) instead).

*If you want to use MAFFT-Homologs within PASTA*
`cd sate-tools-linux` or `cd sate-tools-mac`
Use `git https://github.com/kodicollins/pasta-databases` or download directly at `https://github.com/kodicollins/pasta-databases.git`
*Be sure to leave this directory `cd ..` before starting the next step*

4. `cd pasta` (or `cd pasta-master` if you used the zip file instead of clonning the git repository)

5. Then run:
Expand All @@ -104,6 +109,14 @@ If you don't have root access, remove the `sudo` part and instead use `--user`
* The `setup.py` script is supposed to install setuptools for you if you don't have it. This sometimes works and sometimes doesn't. If you get an error with a message like ` invalid command 'develop'`, it means that setuptools is not installed. To solve this issue, you can manually install [setup tools](https://pypi.python.org/pypi/setuptools#installation-instructions). For example, on Linux, you can run `curl https://bootstrap.pypa.io/ez_setup.py -o - | sudo python`
(but note there are other ways of installing setuptools as well).

6. Pasta now includes additional aligners for Linux and MAC users: mafft-ginsi, mafft-homologs, contralign (version 1), and probcons. In order to use mafft-homologs and contralign, the user must set the environment variable CONTRALIGN_DIR=/dir/to/sate-tools-linux.

A simple step-by-step for this is as following:
a. change your directory to sate-tools-linux (or sate-tools-mac), type `pwd` in the command line, and copy the output
b. `vim ~/.bashrc`, press i and then type CONTRALIGN_DIR=(paste the copied output/directory) at the bottom of the text file, the press ESC followed by :wq
c. then in the command line type `source ~/.bashrc`

To use these aligners, add the following to your pasta execution --aligner=NAME_OF_ALIGNER, where NAME_OF_ALIGNER now includes (ginsi, homologs, contralign, and probcons)

### 3. From Virtual Machine (VM)

Expand Down
20 changes: 16 additions & 4 deletions pasta/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def __init__(self):
self._categories.append('probalign')
self.probcons = UserSettingGroup('probcons')
self._categories.append('probcons')
self.contralign = UserSettingGroup('contralign')
self._categories.append('contralign')
self.ginsi = UserSettingGroup('ginsi')
self._categories.append('ginsi')
self.homologs = UserSettingGroup('homologs')
self._categories.append('homologs')
self.padaligner = UserSettingGroup('padaligner')
self._categories.append('padaligner')
self.clustalw2 = UserSettingGroup('clustalw2')
Expand Down Expand Up @@ -58,14 +64,20 @@ def __init__(self):
self.fasttree.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to FastTree.', subcategory=None))
self.opal.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to Opal jar file', subcategory=None))
self.opal.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Opal.', subcategory=None))
self.clustalw2.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to clustalw2 exeutable', subcategory=None))
self.clustalw2.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to clustalw2 executable', subcategory=None))
self.clustalw2.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Clustalw2.', subcategory=None))
self.muscle.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to muscle exeutable', subcategory=None))
self.muscle.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to muscle executable', subcategory=None))
self.muscle.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Muscle.', subcategory=None))
self.probalign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbalignAligner exeutable', subcategory=None))
self.probalign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbalignAligner executable', subcategory=None))
self.probalign.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Probalign.', subcategory=None))
self.probcons.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner exeutable', subcategory=None))
self.probcons.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None))
self.probcons.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None))
self.contralign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None))
self.contralign.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None))
self.ginsi.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None))
self.ginsi.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None))
self.homologs.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None))
self.homologs.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None))
self.commandline.add_option('exportconfig', StringUserSetting(name='exportconfig', default=None, short_name=None, help='Export the configuration to the specified file and exit. This is useful if you want to combine several configurations and command line settings into a single configuration file to be used in other analyses.', subcategory=None))
self.commandline.add_option('input', StringUserSetting(name='input', default=None, short_name='i', help='input sequence file', subcategory=None))
self.commandline.add_option('treefile', StringUserSetting(name='treefile', default=None, short_name='t', help='starting tree file', subcategory=None))
Expand Down
108 changes: 106 additions & 2 deletions pasta/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,79 @@ def create_job(self, alignment, guide_tree=None, **kwargs):
delete_temps=kwargs.get('delete_temps', self.delete_temps),
stdout=alignedfn)

class GinsiAligner(Aligner):
section_name = 'ginsi aligner'
#url = 'http://align.bmr.kyushu-u.ac.jp/mafft/software'
is_bundled = False

def __init__(self, temp_fs, **kwargs):
Aligner.__init__(self, 'ginsi', temp_fs, **kwargs)

def create_job(self, alignment, guide_tree=None, **kwargs):
job_id = kwargs.get('context_str', '') + '_ginsi'
if alignment.get_num_taxa() == 0:
return FakeJob(alignment, context_str=job_id)
new_alignment = alignment.unaligned()
if new_alignment.get_num_taxa() < 2:
return FakeJob(new_alignment, context_str=job_id)
scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs)

invoc = []
if platform.system() == "Windows":
invoc.append(self.exe)
else:
invoc.extend([self.exe])
invoc.extend(['--globalpair', '--maxiterate', '1000'])
if '--ep' not in self.user_opts:
invoc.extend(['--ep', '0.123'])
invoc.extend(['--quiet'])
invoc.extend(self.user_opts)
invoc.extend(['--thread',str(kwargs.get('num_cpus', 1))])
invoc.append(seqfn)

# The MAFFT job creation is slightly different from the other
# aligners because we redirect and read standard output.

return self._finish_standard_job(alignedfn=alignedfn,
datatype=alignment.datatype,
invoc=invoc,
scratch_dir=scratch_dir,
job_id=job_id,
delete_temps=kwargs.get('delete_temps', self.delete_temps),
stdout=alignedfn)

class HomologsAligner(Aligner):
section_name = 'homologs aligner'
#url = 'http://align.bmr.kyushu-u.ac.jp/mafft/software'
is_bundled_tool = False

def __init__(self, temp_fs, **kwargs):
Aligner.__init__(self, 'homologs', temp_fs, **kwargs)

def create_job(self, alignment, guide_tree=None, **kwargs):
job_id = kwargs.get('context_str', '') + '_homologs'
if alignment.get_num_taxa() == 0:
return FakeJob(alignment, context_str=job_id)
new_alignment = alignment.unaligned()
if new_alignment.get_num_taxa() < 2:
return FakeJob(new_alignment, context_str=job_id)
scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs)

invoc = [self.exe, '-l', seqfn]
invoc.extend(['--thread', str(kwargs.get('num_cpus', 1))])
invoc.extend(self.user_opts)

# The probcons job creation is slightly different from the other
# aligners because we redirect and read standard output.

return self._finish_standard_job(alignedfn=alignedfn,
datatype=alignment.datatype,
invoc=invoc,
scratch_dir=scratch_dir,
job_id=job_id,
delete_temps=kwargs.get('delete_temps', self.delete_temps),
stdout=alignedfn)


class OpalAligner(Aligner):
section_name = 'opal aligner'
Expand Down Expand Up @@ -318,6 +391,37 @@ def create_job(self, alignment, guide_tree=None, **kwargs):
job_id=job_id,
delete_temps=kwargs.get('delete_temps', self.delete_temps))

class ContralignAligner(Aligner):
section_name = 'contralign aligner'
url = 'http://contra.stanford.edu/contralign/'
is_bundled_tool = False

def __init__(self, temp_fs, **kwargs):
Aligner.__init__(self, 'contralign', temp_fs, **kwargs)

def create_job(self, alignment, guide_tree=None, **kwargs):
job_id = kwargs.get('context_str', '') + '_contralign'
if alignment.get_num_taxa() == 0:
return FakeJob(alignment, context_str=job_id)
new_alignment = alignment.unaligned()
if new_alignment.get_num_taxa() < 2:
return FakeJob(new_alignment, context_str=job_id)
scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs)

invoc = [self.exe, seqfn]
invoc.extend(self.user_opts)

# The probcons job creation is slightly different from the other
# aligners because we redirect and read standard output.

return self._finish_standard_job(alignedfn=alignedfn,
datatype=alignment.datatype,
invoc=invoc,
scratch_dir=scratch_dir,
job_id=job_id,
delete_temps=kwargs.get('delete_temps', self.delete_temps),
stdout=alignedfn)

class MuscleAligner(Aligner):
section_name = 'muscle aligner'
url = 'http://www.drive5.com/muscle'
Expand Down Expand Up @@ -970,11 +1074,11 @@ def create_job(self, backbone, query_fn, **kwargs):
delete_temps=kwargs.get('delete_temps', self.delete_temps))

if GLOBAL_DEBUG:
AlignerClasses = (ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, PadAligner, FakeAligner, CustomAligner, HMMERAlignAligner, ProbconsAligner)
AlignerClasses = (GinsiAligner, HomologsAligner, ContralignAligner, ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, PadAligner, FakeAligner, CustomAligner, HMMERAlignAligner, ProbconsAligner)
MergerClasses = (MuscleMerger, OpalMerger)
TreeEstimatorClasses = (FastTree, Randtree, Raxml, FakeTreeEstimator, CustomTreeEstimator)
else:
AlignerClasses = (ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, MuscleAligner, CustomAligner, HMMERAlignAligner)
AlignerClasses = (GinsiAligner, HomologsAligner, ContralignAligner, ProbconsAligner, ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, MuscleAligner, CustomAligner, HMMERAlignAligner)
MergerClasses = (MuscleMerger, OpalMerger, CustomMerger)
TreeEstimatorClasses = (Raxml, FastTree, CustomTreeEstimator)

Expand Down
17 changes: 14 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import platform
import sys
import pasta

import tarfile
script_name = 'run_pasta.py'
gui_script_name = 'run_pasta_gui.py'

Expand Down Expand Up @@ -187,13 +187,24 @@ def create_symlink(src_path, subdir=None):
for subdir in tools_bin_subdirs:
if subdir:
tdir = os.path.join(tools_bin_srcdir, subdir)
#print 'tdir' + str(tdir)
else:
tdir = tools_bin_srcdir
for fpath in os.listdir(tdir):
src_path = os.path.join(tdir, fpath)
if os.path.isfile(src_path) and not src_path.endswith('.txt'):
create_symlink(src_path, subdir)


#databases in sate-tools-linux holds the swissprot* files for mafft-homologs. They compressed to appease git so we have to extract them to use them.
searchDir = os.path.join(tools_bin_srcdir, 'databases')
for files in os.listdir(searchDir):
fullPath = os.path.join(searchDir, files)
if fullPath.endswith("tar.gz"):
tar = tarfile.open(fullPath, "r:gz")
tar.extractall(searchDir)
tar.close()

mafftDir = os.path.join(tools_bin_srcdir, 'mafft')
ginsiDir = os.path.join(DEST_DIR_ROOT, 'ginsi')
os.symlink(mafftDir, ginsiDir)

setup(**param)

0 comments on commit 181d42e

Please sign in to comment.