qiita-spots · josenavas · Mar 30, 2016 · Mar 29, 2016 · Mar 29, 2016 · Mar 29, 2016
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -19,6 +19,7 @@
 from __future__ import division
 from itertools import product
 from os.path import join
+from subprocess import Popen, PIPE
 
 from future.utils import viewitems
 from biom import load_table
@@ -467,22 +468,23 @@ def biom_tables(self):
         dict
             Dictonary in the form {data_type: full BIOM filepath}
         """
-        with qdb.sql_connection.TRN:
-            fptypeid = qdb.util.convert_to_id("biom", "filepath_type")
-            sql = """SELECT data_type, filepath
-                     FROM qiita.filepath
-                        JOIN qiita.analysis_filepath USING (filepath_id)
-                        JOIN qiita.data_type USING (data_type_id)
-                     WHERE analysis_id = %s AND filepath_type_id = %s"""
-            qdb.sql_connection.TRN.add(sql, [self._id, fptypeid])
-            tables = qdb.sql_connection.TRN.execute_fetchindex()
-            if not tables:
-                return {}
-            ret_tables = {}
-            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
-            for fp in tables:
-                ret_tables[fp[0]] = join(base_fp, fp[1])
-            return ret_tables
+        fps = [(_id, fp) for _id, fp, ftype in qdb.util.retrieve_filepaths(
+            "analysis_filepath", "analysis_id", self._id)
+            if ftype == 'biom']
+
+        if fps:
+            fps_ids = [f[0] for f in fps]
+            with qdb.sql_connection.TRN:
+                sql = """SELECT filepath_id, data_type FROM qiita.filepath
+                            JOIN qiita.analysis_filepath USING (filepath_id)
+                            JOIN qiita.data_type USING (data_type_id)
+                            WHERE filepath_id IN %s"""
+                qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)])
+                data_types = dict(qdb.sql_connection.TRN.execute_fetchindex())
+
+            return {data_types[_id]: f for _id, f in fps}
+        else:
+            return {}
 
     @property
     def mapping_file(self):
@@ -493,19 +495,34 @@ def mapping_file(self):
         str or None
             full filepath to the mapping file or None if not generated
         """
-        with qdb.sql_connection.TRN:
-            fptypeid = qdb.util.convert_to_id("plain_text", "filepath_type")
-            sql = """SELECT filepath
-                     FROM qiita.filepath
-                        JOIN qiita.analysis_filepath USING (filepath_id)
-                     WHERE analysis_id = %s AND filepath_type_id = %s"""
-            qdb.sql_connection.TRN.add(sql, [self._id, fptypeid])
-            mapping_fp = qdb.sql_connection.TRN.execute_fetchindex()
-            if not mapping_fp:
-                return None
+        fp = [fp for _, fp, fp_type in qdb.util.retrieve_filepaths(
+            "analysis_filepath", "analysis_id", self._id)
+            if fp_type == 'plain_text']
 
-            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
-            return join(base_fp, mapping_fp[0][0])
+        if fp:
+            # returning the actual path vs. an array
+            return fp[0]
+        else:
+            return None
+
+    @property
+    def tgz(self):
+        """Returns the tgz file of the analysis
+
+        Returns
+        -------
+        str or None
+            full filepath to the mapping file or None if not generated
+        """
+        fp = [fp for _, fp, fp_type in qdb.util.retrieve_filepaths(
+            "analysis_filepath", "analysis_id", self._id)
+            if fp_type == 'tgz']
+
+        if fp:
+            # returning the actual path vs. an array
+            return fp[0]
+        else:
+            return None
 
     @property
     def step(self):
@@ -768,6 +785,31 @@ def remove_samples(self, artifacts=None, samples=None):
             qdb.sql_connection.TRN.add(sql, args, many=True)
             qdb.sql_connection.TRN.execute()
 
+    def generate_tgz(self):
+        fps_ids = self.all_associated_filepath_ids
+        with qdb.sql_connection.TRN:
+            sql = """SELECT filepath, data_directory_id FROM qiita.filepath
+                        WHERE filepath_id IN %s"""
+            qdb.sql_connection.TRN.add(sql, [tuple(fps_ids)])
+
+            full_fps = [join(qdb.util.get_mountpoint_path_by_id(mid), f)
+                        for f, mid in
+                        qdb.sql_connection.TRN.execute_fetchindex()]
+
+            _, analysis_mp = qdb.util.get_mountpoint('analysis')[0]
+            tgz = join(analysis_mp, '%d_files.tgz' % self.id)
+            cmd = 'tar zcf %s %s' % (tgz, ' '.join(full_fps))
+
+            proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
+                         stderr=PIPE)
+            stdout, stderr = proc.communicate()
+            return_value = proc.returncode
+
+            if return_value == 0:
+                self._add_file(tgz, 'tgz')
+
+        return stdout, stderr, return_value
+
     def build_files(self,
                     rarefaction_depth=None,
                     merge_duplicated_sample_ids=False):

diff --git a/qiita_db/support_files/patches/41.sql b/qiita_db/support_files/patches/41.sql
@@ -0,0 +1,3 @@
+-- Mar 28, 2016
+
+INSERT INTO qiita.filepath_type (filepath_type) VALUES ('tgz')
diff --git a/qiita_db/support_files/patches/python_patches/41.py b/qiita_db/support_files/patches/python_patches/41.py
@@ -0,0 +1,109 @@
+
+from subprocess import Popen, PIPE
+from os.path import exists, join
+
+from qiita_db.sql_connection import TRN
+from qiita_db.artifact import Artifact
+from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint,
+                           get_mountpoint_path_by_id)
+
+
+tgz_id = convert_to_id("tgz", "filepath_type")
+_, analysis_mp = get_mountpoint('analysis')[0]
+
+with TRN:
+    #
+    # Generating compressed files for picking failures -- artifact_type = BIOM
+    #
+    sql = """SELECT artifact_id FROM qiita.artifact
+                JOIN qiita.artifact_type USING (artifact_type_id)
+                WHERE artifact_type = 'BIOM'"""
+    TRN.add(sql)
+
+    for r in TRN.execute_fetchindex():
+        to_tgz = None
+        a = Artifact(r[0])
+        for _, fp, fp_type in a.filepaths:
+            if fp_type == 'directory':
+                # removing / from the path if it exists
+                to_tgz = fp[:-1] if fp[-1] == '/' else fp
+                break
+
+        if to_tgz is None:
+            continue
+
+        tgz = to_tgz + '.tgz'
+        if not exists(tgz):
+            cmd = 'tar zcf %s %s' % (tgz, to_tgz)
+            proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
+                         stderr=PIPE)
+            stdout, stderr = proc.communicate()
+            return_value = proc.returncode
+            if return_value != 0:
+                raise ValueError(
+                    "There was an error:\nstdout:\n%s\n\nstderr:\n%s"
+                    % (stdout, stderr))
+
+        a_id = a.id
+        # Add the new tgz file to the artifact.
+        fp_ids = insert_filepaths([(tgz, tgz_id)], a_id, a.artifact_type,
+                                  "filepath", move_files=False)
+        sql = """INSERT INTO qiita.artifact_filepath
+                    (artifact_id, filepath_id)
+                 VALUES (%s, %s)"""
+        sql_args = [[a_id, fp_id] for fp_id in fp_ids]
+        TRN.add(sql, sql_args, many=True)
+        TRN.execute()
+
+    #
+    # Generating compressed files for analysis
+    #
+    TRN.add("SELECT analysis_id FROM qiita.analysis")
+    for result in TRN.execute_fetchindex():
+        analysis_id = result[0]
+        # retrieving all analysis filepaths, we could have used
+        # Analysis.all_associated_filepath_ids but we could run into the
+        # analysis not belonging to the current portal, thus using SQL
+
+        sql = """SELECT filepath, data_directory_id
+                 FROM qiita.filepath
+                    JOIN qiita.analysis_filepath USING (filepath_id)
+                 WHERE analysis_id = %s"""
+        TRN.add(sql, [analysis_id])
+        fps = set([tuple(r) for r in TRN.execute_fetchindex()])
+        sql = """SELECT filepath, data_directory_id
+                 FROM qiita.analysis_job
+                    JOIN qiita.job USING (job_id)
+                    JOIN qiita.job_results_filepath USING (job_id)
+                    JOIN qiita.filepath USING (filepath_id)
+                 WHERE analysis_id = %s"""
+        TRN.add(sql, [analysis_id])
+        fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])
+
+        # no filepaths in the analysis
+        if not fps:
+            continue
+
+        tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
+        if not exists(tgz):
+            full_fps = [join(get_mountpoint_path_by_id(mid), f)
+                        for f, mid in fps]
+            cmd = 'tar zcf %s %s' % (tgz, ' '.join(full_fps))
+            proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
+                         stderr=PIPE)
+            stdout, stderr = proc.communicate()
+            return_value = proc.returncode
+            if return_value != 0:
+                raise ValueError(
+                    "There was an error:\nstdout:\n%s\n\nstderr:\n%s"
+                    % (stdout, stderr))
+
+        # Add the new tgz file to the analysis.
+        fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis',
+                                  "filepath", move_files=False)
+        sql = """INSERT INTO qiita.analysis_filepath
+                    (analysis_id, filepath_id)
+                 VALUES (%s, %s)"""
+        sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
+        TRN.add(sql, sql_args, many=True)
+        TRN.execute()
diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py
@@ -455,6 +455,26 @@ def test_retrieve_mapping_file_none(self):
         obs = new.mapping_file
         self.assertEqual(obs, None)
 
+    def test_retrieve_tgz(self):
+        # generating here as the tgz is only generated once the analysis runs
+        # to completion (un)successfully
+        analysis = qdb.analysis.Analysis(1)
+        fp = self.get_fp('test.tgz')
+        with open(fp, 'w') as f:
+            f.write('')
+        analysis._add_file(fp, 'tgz')
+        self.assertEqual(self.analysis.tgz, fp)
+
+    def test_retrieve_tgz_none(self):
+        self.assertIsNone(self.analysis.tgz)
+
+    def test_generate_tgz(self):
+        obs_sout, obs_serr, obs_return = self.analysis.generate_tgz()
+        # not testing obs_serr as it will change depending on the system's tar
+        # version
+        self.assertEqual(obs_sout, "")
+        self.assertEqual(obs_return, 0)
+
     # def test_get_parent(self):
     #     raise NotImplementedError()
 

diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py
@@ -39,7 +39,7 @@ def test_filepath(self):
         self.assertEqual(get_count("qiita.filepath"), 19)
 
     def test_filepath_type(self):
-        self.assertEqual(get_count("qiita.filepath_type"), 20)
+        self.assertEqual(get_count("qiita.filepath_type"), 21)
 
     def test_study_prep_template(self):
         self.assertEqual(get_count("qiita.study_prep_template"), 1)

diff --git a/qiita_pet/handlers/analysis_handlers.py b/qiita_pet/handlers/analysis_handlers.py
@@ -225,20 +225,29 @@ def get(self):
         dlop = partial(download_link_or_path, is_local_request)
         mappings = {}
         bioms = {}
+        tgzs = {}
         for analysis in analyses:
             _id = analysis.id
+            # getting mapping file
             mapping = analysis.mapping_file
             if mapping is not None:
                 mappings[_id] = dlop(mapping, gfi(mapping), 'mapping file')
             else:
                 mappings[_id] = ''
+            # getting biom tables
             links = [dlop(f, gfi(f), l)
                      for l, f in viewitems(analysis.biom_tables)]
             bioms[_id] = '\n'.join(links)
+            # getting tgz file
+            tgz = analysis.tgz
+            if tgz is not None:
+                tgzs[_id] = dlop(tgz, gfi(tgz), 'tgz file')
+            else:
+                tgzs[_id] = ''
 
         self.render("show_analyses.html", analyses=analyses, message=message,
                     level=level, is_local_request=is_local_request,
-                    mappings=mappings, bioms=bioms)
+                    mappings=mappings, bioms=bioms, tgzs=tgzs)
 
     @authenticated
     @execute_as_transaction

diff --git a/qiita_pet/templates/show_analyses.html b/qiita_pet/templates/show_analyses.html
@@ -26,6 +26,7 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
       <th>Timestamp</th>
       <th>Mapping File</th>
       <th>Biom Files</th>
+      <th>tgz Files</th>
       <th>Delete?</th>
     </tr>
     {% for analysis in analyses %}
@@ -68,6 +69,9 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
         <td>
           {% raw bioms[_id] %}
         </td>
+        <td>
+          {% raw tgzs[_id] %}
+        </td>
         <td>
          <a class="btn btn-danger glyphicon glyphicon-trash {% if status == 'running' %} disabled {% end %}" onclick="delete_analysis('{{analysis.name}}', {{analysis.id}});"></a>
         </td>

diff --git a/qiita_plugins/target_gene/tgp/pick_otus.py b/qiita_plugins/target_gene/tgp/pick_otus.py
@@ -88,6 +88,26 @@ def generate_pick_closed_reference_otus_cmd(filepaths, out_dir, parameters,
     return cmd, output_dir
 
 
+def generate_sortmerna_tgz(out_dir):
+    """Generates the sortmerna failures tgz command
+
+    Parameters
+    ----------
+    out_dir : str
+        The job output directory
+
+    Returns
+    -------
+    str
+        The sortmerna failures tgz command
+    """
+    to_tgz = join(out_dir, 'sortmerna_picked_otus')
+    tgz = to_tgz + '.tgz'
+    cmd = 'tar zcf %s %s' % (tgz, to_tgz)
+
+    return cmd
+
+
 def generate_artifact_info(pick_out):
     """Creates the artifact information to attach to the payload
 
@@ -104,6 +124,7 @@ def generate_artifact_info(pick_out):
     path_builder = partial(join, pick_out)
     filepaths = [(path_builder('otu_table.biom'), 'biom'),
                  (path_builder('sortmerna_picked_otus'), 'directory'),
+                 (path_builder('sortmerna_picked_otus.tgz'), 'tgz'),
                  (glob(path_builder('log_*.txt'))[0], 'log')]
     return [['OTU table', 'BIOM', filepaths]]
 
@@ -132,7 +153,7 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
     ValueError
         If there is any error gathering the information from the server
     """
-    qclient.update_job_step(job_id, "Step 1 of 3: Collecting information")
+    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
     artifact_id = parameters['input_data']
     fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
     if not fps_info or not fps_info['success']:
@@ -155,17 +176,26 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
         raise ValueError(error_msg)
     reference_fps = ref_info['filepaths']
 
-    qclient.update_job_step(job_id, "Step 2 of 3: Generating command")
+    qclient.update_job_step(job_id, "Step 2 of 4: Generating command")
     command, pick_out = generate_pick_closed_reference_otus_cmd(
         fps, out_dir, parameters, reference_fps)
 
-    qclient.update_job_step(job_id, "Step 3 of 3: Executing OTU picking")
+    qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking")
     std_out, std_err, return_value = system_call(command)
     if return_value != 0:
         error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
                      % (std_out, std_err))
         return format_payload(False, error_msg=error_msg)
 
+    qclient.update_job_step(job_id,
+                            "Step 4 of 4: Generating tgz sortmerna folder")
+    command = generate_sortmerna_tgz(pick_out)
+    std_out, std_err, return_value = system_call(command)
+    if return_value != 0:
+        error_msg = ("Error while tgz failures:\nStd out: %s\nStd err: %s"
+                     % (std_out, std_err))
+        return format_payload(False, error_msg=error_msg)
+
     artifacts_info = generate_artifact_info(pick_out)
 
     return format_payload(True, artifacts_info=artifacts_info)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		-- Mar 28, 2016

		INSERT INTO qiita.filepath_type (filepath_type) VALUES ('tgz')