From 3d3979799915e2c348656516cf4451ae215a29ce Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 19:18:05 -0500 Subject: [PATCH 01/10] new --- pasta/settings.py | 20 +++++++-- pasta/tools.py | 108 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 122 insertions(+), 6 deletions(-) diff --git a/pasta/settings.py b/pasta/settings.py index e9d8a1f..519e2ba 100644 --- a/pasta/settings.py +++ b/pasta/settings.py @@ -27,6 +27,12 @@ def __init__(self): self._categories.append('probalign') self.probcons = UserSettingGroup('probcons') self._categories.append('probcons') + self.contralign = UserSettingGroup('contralign') + self._categories.append('contralign') + self.ginsi = UserSettingGroup('ginsi') + self._categories.append('ginsi') + self.homologs = UserSettingGroup('homologs') + self._categories.append('homologs') self.padaligner = UserSettingGroup('padaligner') self._categories.append('padaligner') self.clustalw2 = UserSettingGroup('clustalw2') @@ -58,14 +64,20 @@ def __init__(self): self.fasttree.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to FastTree.', subcategory=None)) self.opal.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to Opal jar file', subcategory=None)) self.opal.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Opal.', subcategory=None)) - self.clustalw2.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to clustalw2 exeutable', subcategory=None)) + self.clustalw2.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to clustalw2 executable', subcategory=None)) self.clustalw2.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Clustalw2.', subcategory=None)) - self.muscle.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to muscle exeutable', subcategory=None)) + self.muscle.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to muscle executable', subcategory=None)) self.muscle.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Muscle.', subcategory=None)) - self.probalign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbalignAligner exeutable', subcategory=None)) + self.probalign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbalignAligner executable', subcategory=None)) self.probalign.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to Probalign.', subcategory=None)) - self.probcons.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner exeutable', subcategory=None)) + self.probcons.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None)) self.probcons.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None)) + self.contralign.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None)) + self.contralign.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None)) + self.ginsi.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None)) + self.ginsi.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None)) + self.homologs.add_option('path', StringUserSetting(name='path', default='', short_name=None, help='Path to ProbConsAligner executable', subcategory=None)) + self.homologs.add_option('args', StringUserSetting(name='args', default='', short_name=None, help='Arguments to be passed to ProbCons.', subcategory=None)) self.commandline.add_option('exportconfig', StringUserSetting(name='exportconfig', default=None, short_name=None, help='Export the configuration to the specified file and exit. This is useful if you want to combine several configurations and command line settings into a single configuration file to be used in other analyses.', subcategory=None)) self.commandline.add_option('input', StringUserSetting(name='input', default=None, short_name='i', help='input sequence file', subcategory=None)) self.commandline.add_option('treefile', StringUserSetting(name='treefile', default=None, short_name='t', help='starting tree file', subcategory=None)) diff --git a/pasta/tools.py b/pasta/tools.py index 4572891..13079a3 100644 --- a/pasta/tools.py +++ b/pasta/tools.py @@ -257,6 +257,79 @@ def create_job(self, alignment, guide_tree=None, **kwargs): delete_temps=kwargs.get('delete_temps', self.delete_temps), stdout=alignedfn) +class GinsiAligner(Aligner): + section_name = 'ginsi aligner' + #url = 'http://align.bmr.kyushu-u.ac.jp/mafft/software' + is_bundled = False + + def __init__(self, temp_fs, **kwargs): + Aligner.__init__(self, 'ginsi', temp_fs, **kwargs) + + def create_job(self, alignment, guide_tree=None, **kwargs): + job_id = kwargs.get('context_str', '') + '_ginsi' + if alignment.get_num_taxa() == 0: + return FakeJob(alignment, context_str=job_id) + new_alignment = alignment.unaligned() + if new_alignment.get_num_taxa() < 2: + return FakeJob(new_alignment, context_str=job_id) + scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs) + + invoc = [] + if platform.system() == "Windows": + invoc.append(self.exe) + else: + invoc.extend([self.exe]) + invoc.extend(['--globalpair', '--maxiterate', '1000']) + if '--ep' not in self.user_opts: + invoc.extend(['--ep', '0.123']) + invoc.extend(['--quiet']) + invoc.extend(self.user_opts) + invoc.extend(['--thread',str(kwargs.get('num_cpus', 1))]) + invoc.append(seqfn) + + # The MAFFT job creation is slightly different from the other + # aligners because we redirect and read standard output. + + return self._finish_standard_job(alignedfn=alignedfn, + datatype=alignment.datatype, + invoc=invoc, + scratch_dir=scratch_dir, + job_id=job_id, + delete_temps=kwargs.get('delete_temps', self.delete_temps), + stdout=alignedfn) + +class HomologsAligner(Aligner): + section_name = 'homologs aligner' + #url = 'http://align.bmr.kyushu-u.ac.jp/mafft/software' + is_bundled_tool = False + + def __init__(self, temp_fs, **kwargs): + Aligner.__init__(self, 'homologs', temp_fs, **kwargs) + + def create_job(self, alignment, guide_tree=None, **kwargs): + job_id = kwargs.get('context_str', '') + '_homologs' + if alignment.get_num_taxa() == 0: + return FakeJob(alignment, context_str=job_id) + new_alignment = alignment.unaligned() + if new_alignment.get_num_taxa() < 2: + return FakeJob(new_alignment, context_str=job_id) + scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs) + + invoc = [self.exe, '-l', seqfn] + invoc.extend(['--thread', str(kwargs.get('num_cpus', 1))]) + invoc.extend(self.user_opts) + + # The probcons job creation is slightly different from the other + # aligners because we redirect and read standard output. + + return self._finish_standard_job(alignedfn=alignedfn, + datatype=alignment.datatype, + invoc=invoc, + scratch_dir=scratch_dir, + job_id=job_id, + delete_temps=kwargs.get('delete_temps', self.delete_temps), + stdout=alignedfn) + class OpalAligner(Aligner): section_name = 'opal aligner' @@ -318,6 +391,37 @@ def create_job(self, alignment, guide_tree=None, **kwargs): job_id=job_id, delete_temps=kwargs.get('delete_temps', self.delete_temps)) +class ContralignAligner(Aligner): + section_name = 'contralign aligner' + url = 'http://contra.stanford.edu/contralign/' + is_bundled_tool = False + + def __init__(self, temp_fs, **kwargs): + Aligner.__init__(self, 'contralign', temp_fs, **kwargs) + + def create_job(self, alignment, guide_tree=None, **kwargs): + job_id = kwargs.get('context_str', '') + '_contralign' + if alignment.get_num_taxa() == 0: + return FakeJob(alignment, context_str=job_id) + new_alignment = alignment.unaligned() + if new_alignment.get_num_taxa() < 2: + return FakeJob(new_alignment, context_str=job_id) + scratch_dir, seqfn, alignedfn = self._prepare_input(new_alignment, **kwargs) + + invoc = [self.exe, seqfn] + invoc.extend(self.user_opts) + + # The probcons job creation is slightly different from the other + # aligners because we redirect and read standard output. + + return self._finish_standard_job(alignedfn=alignedfn, + datatype=alignment.datatype, + invoc=invoc, + scratch_dir=scratch_dir, + job_id=job_id, + delete_temps=kwargs.get('delete_temps', self.delete_temps), + stdout=alignedfn) + class MuscleAligner(Aligner): section_name = 'muscle aligner' url = 'http://www.drive5.com/muscle' @@ -970,11 +1074,11 @@ def create_job(self, backbone, query_fn, **kwargs): delete_temps=kwargs.get('delete_temps', self.delete_temps)) if GLOBAL_DEBUG: - AlignerClasses = (ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, PadAligner, FakeAligner, CustomAligner, HMMERAlignAligner, ProbconsAligner) + AlignerClasses = (GinsiAligner, HomologsAligner, ContralignAligner, ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, PadAligner, FakeAligner, CustomAligner, HMMERAlignAligner, ProbconsAligner) MergerClasses = (MuscleMerger, OpalMerger) TreeEstimatorClasses = (FastTree, Randtree, Raxml, FakeTreeEstimator, CustomTreeEstimator) else: - AlignerClasses = (ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, MuscleAligner, CustomAligner, HMMERAlignAligner) + AlignerClasses = (GinsiAligner, HomologsAligner, ContralignAligner, ProbconsAligner, ProbalignAligner, Clustalw2Aligner, MafftAligner, PrankAligner, OpalAligner, MuscleAligner, CustomAligner, HMMERAlignAligner) MergerClasses = (MuscleMerger, OpalMerger, CustomMerger) TreeEstimatorClasses = (Raxml, FastTree, CustomTreeEstimator) From 8005196a92d525d16ae50da8dfec393ee7839629 Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 20:18:04 -0500 Subject: [PATCH 02/10] fix setup.py --- setup.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f48c466..b27fa81 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ import platform import sys import pasta - +import tarfile script_name = 'run_pasta.py' gui_script_name = 'run_pasta_gui.py' @@ -193,7 +193,14 @@ def create_symlink(src_path, subdir=None): src_path = os.path.join(tdir, fpath) if os.path.isfile(src_path) and not src_path.endswith('.txt'): create_symlink(src_path, subdir) - + #databases in sate-tools-linux holds the swissprot* files for mafft-homologs. They compressed to appease git so we have to extract them to use them. + searchDir = os.path.join(tools_bin_srcdir, 'databases') + for files in os.listdir(searchDir): + fullPath = os.path.join(searchDir, files) + if tar.endswith("tar.gz"): + tar = tarfiles.open(files, r:gz) + tar.extractall(searchDir) + tar.close() setup(**param) From 8373662b4e34ba7b61a3f7cd468802e8d0bd5e81 Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 20:25:08 -0500 Subject: [PATCH 03/10] fix setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b27fa81..c25e08f 100644 --- a/setup.py +++ b/setup.py @@ -198,8 +198,8 @@ def create_symlink(src_path, subdir=None): searchDir = os.path.join(tools_bin_srcdir, 'databases') for files in os.listdir(searchDir): fullPath = os.path.join(searchDir, files) - if tar.endswith("tar.gz"): - tar = tarfiles.open(files, r:gz) + if fullPath.endswith("tar.gz"): + tar = tarfile.open(fullPath, "r:gz") tar.extractall(searchDir) tar.close() From 457e2809a05aef9fb5d093b608f4361beb54d2ee Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 20:59:44 -0500 Subject: [PATCH 04/10] ginsi --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c25e08f..7d8a3d3 100644 --- a/setup.py +++ b/setup.py @@ -193,7 +193,8 @@ def create_symlink(src_path, subdir=None): src_path = os.path.join(tdir, fpath) if os.path.isfile(src_path) and not src_path.endswith('.txt'): create_symlink(src_path, subdir) - + if subdir.is('mafft'): + create_symlink(src_path, 'ginsi') #databases in sate-tools-linux holds the swissprot* files for mafft-homologs. They compressed to appease git so we have to extract them to use them. searchDir = os.path.join(tools_bin_srcdir, 'databases') for files in os.listdir(searchDir): From 484532fe9002c568d1ce2ae04caa463726e53d7f Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 21:10:09 -0500 Subject: [PATCH 05/10] fix ginsi --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7d8a3d3..2dbce00 100644 --- a/setup.py +++ b/setup.py @@ -193,8 +193,6 @@ def create_symlink(src_path, subdir=None): src_path = os.path.join(tdir, fpath) if os.path.isfile(src_path) and not src_path.endswith('.txt'): create_symlink(src_path, subdir) - if subdir.is('mafft'): - create_symlink(src_path, 'ginsi') #databases in sate-tools-linux holds the swissprot* files for mafft-homologs. They compressed to appease git so we have to extract them to use them. searchDir = os.path.join(tools_bin_srcdir, 'databases') for files in os.listdir(searchDir): @@ -203,5 +201,6 @@ def create_symlink(src_path, subdir=None): tar = tarfile.open(fullPath, "r:gz") tar.extractall(searchDir) tar.close() - + ginsiDir = os.path.join(tools_bin_srcdir, 'mafft') + create_symlink(ginsiDir, "ginsi") setup(**param) From e5de5fef19417d33ff8afdf79a98069b574c7d9a Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 21:32:56 -0500 Subject: [PATCH 06/10] hopefully fixed --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 2dbce00..f5310d8 100644 --- a/setup.py +++ b/setup.py @@ -187,6 +187,7 @@ def create_symlink(src_path, subdir=None): for subdir in tools_bin_subdirs: if subdir: tdir = os.path.join(tools_bin_srcdir, subdir) + #print 'tdir' + str(tdir) else: tdir = tools_bin_srcdir for fpath in os.listdir(tdir): @@ -201,6 +202,7 @@ def create_symlink(src_path, subdir=None): tar = tarfile.open(fullPath, "r:gz") tar.extractall(searchDir) tar.close() + ginsiDir = os.path.join(tools_bin_srcdir, 'mafft') create_symlink(ginsiDir, "ginsi") setup(**param) From 90bc689d740df2e83042ffbf843ae8eefa24292a Mon Sep 17 00:00:00 2001 From: Kodi Date: Mon, 14 Aug 2017 21:51:50 -0500 Subject: [PATCH 07/10] message --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f5310d8..515afd7 100644 --- a/setup.py +++ b/setup.py @@ -203,6 +203,8 @@ def create_symlink(src_path, subdir=None): tar.extractall(searchDir) tar.close() - ginsiDir = os.path.join(tools_bin_srcdir, 'mafft') - create_symlink(ginsiDir, "ginsi") + mafftDir = os.path.join(tools_bin_srcdir, 'mafft') + ginsiDir = os.path.join(DEST_DIR_ROOT, 'ginsi') + os.symlink(mafftDir, ginsiDir) + setup(**param) From 89eb714f608e02b4486274bfe343e0349ba38154 Mon Sep 17 00:00:00 2001 From: kncllns2 Date: Mon, 14 Aug 2017 22:17:05 -0500 Subject: [PATCH 08/10] Update README.md adding information on the new aligners. --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 42f447a..c77fde4 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,14 @@ If you don't have root access, remove the `sudo` part and instead use `--user` * The `setup.py` script is supposed to install setuptools for you if you don't have it. This sometimes works and sometimes doesn't. If you get and error with a message like ` invalid command 'develop'`, it means that setuptools is not installed. To solve this issue, you can manually install [setup tools](https://pypi.python.org/pypi/setuptools#installation-instructions). For example, on Linux, you can run `curl https://bootstrap.pypa.io/ez_setup.py -o - | sudo python` (but note there are other ways of installing setuptools as well). +6. Pasta now includes additional aligners for Linux users: mafft-ginsi, mafft-homologs, contralign (version 1), and probcons. In order to use mafft-homologs and contralign, the user must set the environment variable CONTRALIGN_DIR=/dir/to/sate-tools-linux. + +A simple step-by-step for this is as following: + a. change your directory to sate-tools-linux, type `pwd` in the command line, and copying the output + b. `vim ~/.bashrc`, press i and then type CONTRALIGN_DIR=(paste the copied output/directory) at the bottom of the text file, the press ESC followed by :wq + c. then in the command line type `source ~/.bashrc` + +To use these aligners, add the following to your pasta execution --aligner=NAME_OF_ALIGNER, where NAME_OF_ALIGNER now includes (ginsi, homologs, contralign, and probcons) ### 3. From Virtual Machine (VM) From b80eb195a2c5460e9a289a129074c1e3dddc0f0f Mon Sep 17 00:00:00 2001 From: Kodi Collins <10749644+kodicollins@users.noreply.github.com> Date: Wed, 4 Apr 2018 18:51:25 -0700 Subject: [PATCH 09/10] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c77fde4..37de3c9 100644 --- a/README.md +++ b/README.md @@ -103,10 +103,10 @@ If you don't have root access, remove the `sudo` part and instead use `--user` * The `setup.py` script is supposed to install setuptools for you if you don't have it. This sometimes works and sometimes doesn't. If you get and error with a message like ` invalid command 'develop'`, it means that setuptools is not installed. To solve this issue, you can manually install [setup tools](https://pypi.python.org/pypi/setuptools#installation-instructions). For example, on Linux, you can run `curl https://bootstrap.pypa.io/ez_setup.py -o - | sudo python` (but note there are other ways of installing setuptools as well). -6. Pasta now includes additional aligners for Linux users: mafft-ginsi, mafft-homologs, contralign (version 1), and probcons. In order to use mafft-homologs and contralign, the user must set the environment variable CONTRALIGN_DIR=/dir/to/sate-tools-linux. +6. Pasta now includes additional aligners for Linux and MAC users: mafft-ginsi, mafft-homologs, contralign (version 1), and probcons. In order to use mafft-homologs and contralign, the user must set the environment variable CONTRALIGN_DIR=/dir/to/sate-tools-linux. A simple step-by-step for this is as following: - a. change your directory to sate-tools-linux, type `pwd` in the command line, and copying the output + a. change your directory to sate-tools-linux (or sate-tools-mac), type `pwd` in the command line, and copy the output b. `vim ~/.bashrc`, press i and then type CONTRALIGN_DIR=(paste the copied output/directory) at the bottom of the text file, the press ESC followed by :wq c. then in the command line type `source ~/.bashrc` From e0ef70db46f1dc12f04260b404742f35b922bd15 Mon Sep 17 00:00:00 2001 From: Kodi Collins <10749644+kodicollins@users.noreply.github.com> Date: Wed, 4 Apr 2018 19:53:21 -0700 Subject: [PATCH 10/10] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 37de3c9..1aaec4f 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,11 @@ You need to rename thse directories and remove the `-master` part. Finally, those with 32-bit Linux machines need to be aware that the master branch has 64bit binaries. 32-bit binaries are provided in the `32bit` branch of `sate-tools-linux` git project (so download [this zip file](https://github.com/smirarab/sate-tools-linux/archive/32bit.zip) instead). +*If you want to use MAFFT-Homologs within PASTA* +`cd sate-tools-linux` or `cd sate-tools-mac` +Use `git https://github.com/kodicollins/pasta-databases` or download directly at `https://github.com/kodicollins/pasta-databases.git` +*Be sure to leave this directory `cd ..` before starting the next step* + 4. `cd pasta` (or `cd pasta-master` if you used the zip file instead of clonning the git repository) 5. Then run: