qiita-spots · josenavas · Mar 30, 2016 · Mar 29, 2016 · Mar 29, 2016 · Mar 29, 2016
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -507,6 +507,29 @@ def mapping_file(self):
             _, base_fp = qdb.util.get_mountpoint(self._table)[0]
             return join(base_fp, mapping_fp[0][0])
 
+    @property
+    def tgz(self):
+        """Returns the tgz file of the analysis
+
+        Returns
+        -------
+        str or None
+            full filepath to the mapping file or None if not generated
+        """
+        with qdb.sql_connection.TRN:
+            fptypeid = qdb.util.convert_to_id("tgz", "filepath_type")
+            sql = """SELECT filepath
+                     FROM qiita.filepath
+                        JOIN qiita.analysis_filepath USING (filepath_id)
+                     WHERE analysis_id = %s AND filepath_type_id = %s"""
+            qdb.sql_connection.TRN.add(sql, [self._id, fptypeid])
+            tgz_fp = qdb.sql_connection.TRN.execute_fetchindex()
+            if not tgz_fp:
+                return None
+
+            _, base_fp = qdb.util.get_mountpoint(self._table)[0]
+            return join(base_fp, tgz_fp[0][0])
+
     @property
     def step(self):
         """Returns the current step of the analysis

diff --git a/qiita_db/support_files/patches/41.sql b/qiita_db/support_files/patches/41.sql
@@ -0,0 +1,3 @@
+-- Mar 28, 2016
+
+INSERT INTO qiita.filepath_type (filepath_type) VALUES ('tgz')
diff --git a/qiita_db/support_files/patches/python_patches/41.py b/qiita_db/support_files/patches/python_patches/41.py
@@ -0,0 +1,109 @@
+
+from subprocess import Popen, PIPE
+from os.path import exists, join
+
+from qiita_db.sql_connection import TRN
+from qiita_db.artifact import Artifact
+from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint,
+                           get_mountpoint_path_by_id)
+
+
+tgz_id = convert_to_id("tgz", "filepath_type")
+_, analysis_mp = get_mountpoint('analysis')[0]
+
+with TRN:
+    #
+    # Generating compressed files for picking failures -- artifact_type = BIOM
+    #
+    sql = """SELECT artifact_id FROM qiita.artifact
+                JOIN qiita.artifact_type USING (artifact_type_id)
+                WHERE artifact_type = 'BIOM'"""
+    TRN.add(sql)
+
+    for r in TRN.execute_fetchindex():
+        to_tgz = None
+        a = Artifact(r[0])
+        for _, fp, fp_type in a.filepaths:
+            if fp_type == 'directory':
+                # removing / from the path if it exists
+                to_tgz = fp[:-1] if fp[-1] == '/' else fp
+                break
+
+        if to_tgz is None:
+            continue
+
+        tgz = to_tgz + '.tgz'
+        if not exists(tgz):
+            cmd = 'tar zcf %s %s' % (tgz, to_tgz)
+            proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
+                         stderr=PIPE)
+            stdout, stderr = proc.communicate()
+            return_value = proc.returncode
+            if return_value != 0:
+                raise ValueError(
+                    "There was an error:\nstdout:\n%s\n\nstderr:\n%s"
+                    % (stdout, stderr))
+
+        a_id = a.id
+        # Add the new tgz file to the artifact.
+        fp_ids = insert_filepaths([(tgz, tgz_id)], a_id, a.artifact_type,
+                                  "filepath", move_files=False)
+        sql = """INSERT INTO qiita.artifact_filepath
+                    (artifact_id, filepath_id)
+                 VALUES (%s, %s)"""
+        sql_args = [[a_id, fp_id] for fp_id in fp_ids]
+        TRN.add(sql, sql_args, many=True)
+        TRN.execute()
+
+    #
+    # Generating compressed files for analysis
+    #
+    TRN.add("SELECT analysis_id FROM qiita.analysis")
+    for result in TRN.execute_fetchindex():
+        analysis_id = result[0]
+        # retrieving all analysis filepaths, we could have used
+        # Analysis.all_associated_filepath_ids but we could run into the
+        # analysis not belonging to the current portal, thus using SQL
+
+        sql = """SELECT filepath, data_directory_id
+                 FROM qiita.filepath
+                    JOIN qiita.analysis_filepath USING (filepath_id)
+                 WHERE analysis_id = %s"""
+        TRN.add(sql, [analysis_id])
+        fps = set([tuple(r) for r in TRN.execute_fetchindex()])
+        sql = """SELECT filepath, data_directory_id
+                 FROM qiita.analysis_job
+                    JOIN qiita.job USING (job_id)
+                    JOIN qiita.job_results_filepath USING (job_id)
+                    JOIN qiita.filepath USING (filepath_id)
+                 WHERE analysis_id = %s"""
+        TRN.add(sql, [analysis_id])
+        fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])
+
+        # no filepaths in the analysis
+        if not fps:
+            continue
+
+        tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
+        if not exists(tgz):
+            full_fps = [join(get_mountpoint_path_by_id(mid), f)
+                        for f, mid in fps]
+            cmd = 'tar zcf %s %s' % (tgz, ' '.join(full_fps))
+            proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
+                         stderr=PIPE)
+            stdout, stderr = proc.communicate()
+            return_value = proc.returncode
+            if return_value != 0:
+                raise ValueError(
+                    "There was an error:\nstdout:\n%s\n\nstderr:\n%s"
+                    % (stdout, stderr))
+
+        # Add the new tgz file to the analysis.
+        fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis',
+                                  "filepath", move_files=False)
+        sql = """INSERT INTO qiita.analysis_filepath
+                    (analysis_id, filepath_id)
+                 VALUES (%s, %s)"""
+        sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
+        TRN.add(sql, sql_args, many=True)
+        TRN.execute()
diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py
@@ -455,6 +455,19 @@ def test_retrieve_mapping_file_none(self):
         obs = new.mapping_file
         self.assertEqual(obs, None)
 
+    def test_retrieve_tgz(self):
+        # generating here as the tgz is only generated once the analysis runs
+        # to completion (un)successfully
+        analysis = qdb.analysis.Analysis(1)
+        fp = self.get_fp('test.tgz')
+        with open(fp, 'w') as f:
+            f.write('')
+        analysis._add_file(fp, 'tgz')
+        self.assertEqual(self.analysis.tgz, fp)
+
+    def test_retrieve_tgz_none(self):
+        self.assertIsNone(self.analysis.tgz)
+
     # def test_get_parent(self):
     #     raise NotImplementedError()
 

diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py
@@ -39,7 +39,7 @@ def test_filepath(self):
         self.assertEqual(get_count("qiita.filepath"), 19)
 
     def test_filepath_type(self):
-        self.assertEqual(get_count("qiita.filepath_type"), 20)
+        self.assertEqual(get_count("qiita.filepath_type"), 21)
 
     def test_study_prep_template(self):
         self.assertEqual(get_count("qiita.study_prep_template"), 1)

diff --git a/qiita_pet/handlers/analysis_handlers.py b/qiita_pet/handlers/analysis_handlers.py
@@ -225,20 +225,29 @@ def get(self):
         dlop = partial(download_link_or_path, is_local_request)
         mappings = {}
         bioms = {}
+        tgzs = {}
         for analysis in analyses:
             _id = analysis.id
+            # getting mapping file
             mapping = analysis.mapping_file
             if mapping is not None:
                 mappings[_id] = dlop(mapping, gfi(mapping), 'mapping file')
             else:
                 mappings[_id] = ''
+            # getting biom tables
             links = [dlop(f, gfi(f), l)
                      for l, f in viewitems(analysis.biom_tables)]
             bioms[_id] = '\n'.join(links)
+            # getting tgz file
+            tgz = analysis.tgz
+            if tgz is not None:
+                tgzs[_id] = dlop(tgz, gfi(tgz), 'tgz file')
+            else:
+                tgzs[_id] = ''
 
         self.render("show_analyses.html", analyses=analyses, message=message,
                     level=level, is_local_request=is_local_request,
-                    mappings=mappings, bioms=bioms)
+                    mappings=mappings, bioms=bioms, tgzs=tgzs)
 
     @authenticated
     @execute_as_transaction

diff --git a/qiita_pet/templates/show_analyses.html b/qiita_pet/templates/show_analyses.html
@@ -26,6 +26,7 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
       <th>Timestamp</th>
       <th>Mapping File</th>
       <th>Biom Files</th>
+      <th>tgz Files</th>
       <th>Delete?</th>
     </tr>
     {% for analysis in analyses %}
@@ -68,6 +69,9 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
         <td>
           {% raw bioms[_id] %}
         </td>
+        <td>
+          {% raw tgzs[_id] %}
+        </td>
         <td>
          <a class="btn btn-danger glyphicon glyphicon-trash {% if status == 'running' %} disabled {% end %}" onclick="delete_analysis('{{analysis.name}}', {{analysis.id}});"></a>
         </td>

diff --git a/qiita_plugins/target_gene/tgp/pick_otus.py b/qiita_plugins/target_gene/tgp/pick_otus.py
@@ -88,6 +88,26 @@ def generate_pick_closed_reference_otus_cmd(filepaths, out_dir, parameters,
     return cmd, output_dir
 
 
+def generate_sortmerna_tgz(out_dir):
+    """Generates the sortmerna failures tgz command
+
+    Parameters
+    ----------
+    out_dir : str
+        The job output directory
+
+    Returns
+    -------
+    str
+        The sortmerna failures tgz command
+    """
+    to_tgz = join(out_dir, 'sortmerna_picked_otus')
+    tgz = to_tgz + '.tgz'
+    cmd = 'tar zcf %s %s' % (tgz, to_tgz)
+
+    return cmd
+
+
 def generate_artifact_info(pick_out):
     """Creates the artifact information to attach to the payload
 
@@ -104,6 +124,7 @@ def generate_artifact_info(pick_out):
     path_builder = partial(join, pick_out)
     filepaths = [(path_builder('otu_table.biom'), 'biom'),
                  (path_builder('sortmerna_picked_otus'), 'directory'),
+                 (path_builder('sortmerna_picked_otus.tgz'), 'tgz'),
                  (glob(path_builder('log_*.txt'))[0], 'log')]
     return [['OTU table', 'BIOM', filepaths]]
 
@@ -132,7 +153,7 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
     ValueError
         If there is any error gathering the information from the server
     """
-    qclient.update_job_step(job_id, "Step 1 of 3: Collecting information")
+    qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
     artifact_id = parameters['input_data']
     fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
     if not fps_info or not fps_info['success']:
@@ -155,17 +176,25 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
         raise ValueError(error_msg)
     reference_fps = ref_info['filepaths']
 
-    qclient.update_job_step(job_id, "Step 2 of 3: Generating command")
+    qclient.update_job_step(job_id, "Step 2 of 4: Generating command")
     command, pick_out = generate_pick_closed_reference_otus_cmd(
         fps, out_dir, parameters, reference_fps)
 
-    qclient.update_job_step(job_id, "Step 3 of 3: Executing OTU picking")
+    qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking")
     std_out, std_err, return_value = system_call(command)
     if return_value != 0:
         error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
                      % (std_out, std_err))
         return format_payload(False, error_msg=error_msg)
 
+    qclient.update_job_step(job_id, "Step 4 of 4: tgz sortmerna folder")
+    command = generate_sortmerna_tgz(pick_out)
+    std_out, std_err, return_value = system_call(command)
+    if return_value != 0:
+        error_msg = ("Error while tgz failures:\nStd out: %s\nStd err: %s"
+                     % (std_out, std_err))
+        return format_payload(False, error_msg=error_msg)
+
     artifacts_info = generate_artifact_info(pick_out)
 
     return format_payload(True, artifacts_info=artifacts_info)
diff --git a/qiita_plugins/target_gene/tgp/tests/test_pick_otus.py b/qiita_plugins/target_gene/tgp/tests/test_pick_otus.py
@@ -13,7 +13,8 @@
 from tempfile import mkstemp, mkdtemp
 
 from tgp.pick_otus import (write_parameters_file, generate_artifact_info,
-                           generate_pick_closed_reference_otus_cmd)
+                           generate_pick_closed_reference_otus_cmd,
+                           generate_sortmerna_tgz)
 
 
 class PickOTUsTests(TestCase):
@@ -66,6 +67,14 @@ def test_generate_pick_closed_reference_otus_cmd(self):
         self.assertEqual(obs, exp)
         self.assertEqual(obs_dir, join(output_dir, 'cr_otus'))
 
+    def test_generate_sortmerna_tgz(self):
+        outdir = mkdtemp()
+        self._clean_up_files.append(outdir)
+        obs = generate_sortmerna_tgz(outdir)
+        exp = ("tar zcf %s/sortmerna_picked_otus.tgz "
+               "%s/sortmerna_picked_otus" % (outdir, outdir))
+        self.assertEqual(obs, exp)
+
     def test_generate_pick_closed_reference_otus_cmd_valueerror(self):
         filepaths = [('/directory/seqs.log', 'log'),
                      ('/directory/seqs.demux', 'preprocessed_demux')]
@@ -93,6 +102,7 @@ def test_generate_artifact_info(self):
         obs = generate_artifact_info(outdir)
         fps = [(join(outdir, "otu_table.biom"), "biom"),
                (join(outdir, "sortmerna_picked_otus"), "directory"),
+               (join(outdir, "sortmerna_picked_otus.tgz"), "tgz"),
                (log_fp, "log")]
         exp = [['OTU table', 'BIOM', fps]]
         self.assertEqual(obs, exp)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		-- Mar 28, 2016

		INSERT INTO qiita.filepath_type (filepath_type) VALUES ('tgz')