Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions qiita_db/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,29 @@ def mapping_file(self):
_, base_fp = qdb.util.get_mountpoint(self._table)[0]
return join(base_fp, mapping_fp[0][0])

@property
def tgz(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use the function retrieve_filepaths from here ? This way it will look up the mountpoint correctly. This will fail, for example, in the test environment.

"""Returns the tgz file of the analysis

Returns
-------
str or None
full filepath to the mapping file or None if not generated
"""
with qdb.sql_connection.TRN:
fptypeid = qdb.util.convert_to_id("tgz", "filepath_type")
sql = """SELECT filepath
FROM qiita.filepath
JOIN qiita.analysis_filepath USING (filepath_id)
WHERE analysis_id = %s AND filepath_type_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id, fptypeid])
tgz_fp = qdb.sql_connection.TRN.execute_fetchindex()
if not tgz_fp:
return None

_, base_fp = qdb.util.get_mountpoint(self._table)[0]
return join(base_fp, tgz_fp[0][0])

@property
def step(self):
"""Returns the current step of the analysis
Expand Down
3 changes: 3 additions & 0 deletions qiita_db/support_files/patches/41.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Mar 28, 2016

INSERT INTO qiita.filepath_type (filepath_type) VALUES ('tgz')
109 changes: 109 additions & 0 deletions qiita_db/support_files/patches/python_patches/41.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@

from subprocess import Popen, PIPE
from os.path import exists, join

from qiita_db.sql_connection import TRN
from qiita_db.artifact import Artifact
from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint,
get_mountpoint_path_by_id)


tgz_id = convert_to_id("tgz", "filepath_type")
_, analysis_mp = get_mountpoint('analysis')[0]

with TRN:
#
# Generating compressed files for picking failures -- artifact_type = BIOM
#
sql = """SELECT artifact_id FROM qiita.artifact
JOIN qiita.artifact_type USING (artifact_type_id)
WHERE artifact_type = 'BIOM'"""
TRN.add(sql)

for r in TRN.execute_fetchindex():
to_tgz = None
a = Artifact(r[0])
for _, fp, fp_type in a.filepaths:
if fp_type == 'directory':
# removing / from the path if it exists
to_tgz = fp[:-1] if fp[-1] == '/' else fp
break

if to_tgz is None:
continue

tgz = to_tgz + '.tgz'
if not exists(tgz):
cmd = 'tar zcf %s %s' % (tgz, to_tgz)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any benefit to use a subprocess instead of the tarfile module?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so ... perhaps not loading into memory the files and let the system deal with that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that a known issue? It just seems unnecessary to use a subprocess, since there's already a module to do this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No that I know of ... changing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
stderr=PIPE)
stdout, stderr = proc.communicate()
return_value = proc.returncode
if return_value != 0:
raise ValueError(
"There was an error:\nstdout:\n%s\n\nstderr:\n%s"
% (stdout, stderr))

a_id = a.id
# Add the new tgz file to the artifact.
fp_ids = insert_filepaths([(tgz, tgz_id)], a_id, a.artifact_type,
"filepath", move_files=False)
sql = """INSERT INTO qiita.artifact_filepath
(artifact_id, filepath_id)
VALUES (%s, %s)"""
sql_args = [[a_id, fp_id] for fp_id in fp_ids]
TRN.add(sql, sql_args, many=True)
TRN.execute()

#
# Generating compressed files for analysis
#
TRN.add("SELECT analysis_id FROM qiita.analysis")
for result in TRN.execute_fetchindex():
analysis_id = result[0]
# retrieving all analysis filepaths, we could have used
# Analysis.all_associated_filepath_ids but we could run into the
# analysis not belonging to the current portal, thus using SQL

sql = """SELECT filepath, data_directory_id
FROM qiita.filepath
JOIN qiita.analysis_filepath USING (filepath_id)
WHERE analysis_id = %s"""
TRN.add(sql, [analysis_id])
fps = set([tuple(r) for r in TRN.execute_fetchindex()])
sql = """SELECT filepath, data_directory_id
FROM qiita.analysis_job
JOIN qiita.job USING (job_id)
JOIN qiita.job_results_filepath USING (job_id)
JOIN qiita.filepath USING (filepath_id)
WHERE analysis_id = %s"""
TRN.add(sql, [analysis_id])
fps = fps.union([tuple(r) for r in TRN.execute_fetchindex()])

# no filepaths in the analysis
if not fps:
continue

tgz = join(analysis_mp, '%d_files.tgz' % analysis_id)
if not exists(tgz):
full_fps = [join(get_mountpoint_path_by_id(mid), f)
for f, mid in fps]
cmd = 'tar zcf %s %s' % (tgz, ' '.join(full_fps))
proc = Popen(cmd, universal_newlines=True, shell=True, stdout=PIPE,
stderr=PIPE)
stdout, stderr = proc.communicate()
return_value = proc.returncode
if return_value != 0:
raise ValueError(
"There was an error:\nstdout:\n%s\n\nstderr:\n%s"
% (stdout, stderr))

# Add the new tgz file to the analysis.
fp_ids = insert_filepaths([(tgz, tgz_id)], analysis_id, 'analysis',
"filepath", move_files=False)
sql = """INSERT INTO qiita.analysis_filepath
(analysis_id, filepath_id)
VALUES (%s, %s)"""
sql_args = [[analysis_id, fp_id] for fp_id in fp_ids]
TRN.add(sql, sql_args, many=True)
TRN.execute()
13 changes: 13 additions & 0 deletions qiita_db/test/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,19 @@ def test_retrieve_mapping_file_none(self):
obs = new.mapping_file
self.assertEqual(obs, None)

def test_retrieve_tgz(self):
# generating here as the tgz is only generated once the analysis runs
# to completion (un)successfully
analysis = qdb.analysis.Analysis(1)
fp = self.get_fp('test.tgz')
with open(fp, 'w') as f:
f.write('')
analysis._add_file(fp, 'tgz')
self.assertEqual(self.analysis.tgz, fp)

def test_retrieve_tgz_none(self):
self.assertIsNone(self.analysis.tgz)

# def test_get_parent(self):
# raise NotImplementedError()

Expand Down
2 changes: 1 addition & 1 deletion qiita_db/test/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_filepath(self):
self.assertEqual(get_count("qiita.filepath"), 19)

def test_filepath_type(self):
self.assertEqual(get_count("qiita.filepath_type"), 20)
self.assertEqual(get_count("qiita.filepath_type"), 21)

def test_study_prep_template(self):
self.assertEqual(get_count("qiita.study_prep_template"), 1)
Expand Down
11 changes: 10 additions & 1 deletion qiita_pet/handlers/analysis_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,29 @@ def get(self):
dlop = partial(download_link_or_path, is_local_request)
mappings = {}
bioms = {}
tgzs = {}
for analysis in analyses:
_id = analysis.id
# getting mapping file
mapping = analysis.mapping_file
if mapping is not None:
mappings[_id] = dlop(mapping, gfi(mapping), 'mapping file')
else:
mappings[_id] = ''
# getting biom tables
links = [dlop(f, gfi(f), l)
for l, f in viewitems(analysis.biom_tables)]
bioms[_id] = '\n'.join(links)
# getting tgz file
tgz = analysis.tgz
if tgz is not None:
tgzs[_id] = dlop(tgz, gfi(tgz), 'tgz file')
else:
tgzs[_id] = ''

self.render("show_analyses.html", analyses=analyses, message=message,
level=level, is_local_request=is_local_request,
mappings=mappings, bioms=bioms)
mappings=mappings, bioms=bioms, tgzs=tgzs)

@authenticated
@execute_as_transaction
Expand Down
4 changes: 4 additions & 0 deletions qiita_pet/templates/show_analyses.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
<th>Timestamp</th>
<th>Mapping File</th>
<th>Biom Files</th>
<th>tgz Files</th>
<th>Delete?</th>
</tr>
{% for analysis in analyses %}
Expand Down Expand Up @@ -68,6 +69,9 @@ <h3><a href="/study/list/">Create an analysis</a></h3>
<td>
{% raw bioms[_id] %}
</td>
<td>
{% raw tgzs[_id] %}
</td>
<td>
<a class="btn btn-danger glyphicon glyphicon-trash {% if status == 'running' %} disabled {% end %}" onclick="delete_analysis('{{analysis.name}}', {{analysis.id}});"></a>
</td>
Expand Down
35 changes: 32 additions & 3 deletions qiita_plugins/target_gene/tgp/pick_otus.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,26 @@ def generate_pick_closed_reference_otus_cmd(filepaths, out_dir, parameters,
return cmd, output_dir


def generate_sortmerna_tgz(out_dir):
"""Generates the sortmerna failures tgz command

Parameters
----------
out_dir : str
The job output directory

Returns
-------
str
The sortmerna failures tgz command
"""
to_tgz = join(out_dir, 'sortmerna_picked_otus')
tgz = to_tgz + '.tgz'
cmd = 'tar zcf %s %s' % (tgz, to_tgz)

return cmd


def generate_artifact_info(pick_out):
"""Creates the artifact information to attach to the payload

Expand All @@ -104,6 +124,7 @@ def generate_artifact_info(pick_out):
path_builder = partial(join, pick_out)
filepaths = [(path_builder('otu_table.biom'), 'biom'),
(path_builder('sortmerna_picked_otus'), 'directory'),
(path_builder('sortmerna_picked_otus.tgz'), 'tgz'),
(glob(path_builder('log_*.txt'))[0], 'log')]
return [['OTU table', 'BIOM', filepaths]]

Expand Down Expand Up @@ -132,7 +153,7 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
ValueError
If there is any error gathering the information from the server
"""
qclient.update_job_step(job_id, "Step 1 of 3: Collecting information")
qclient.update_job_step(job_id, "Step 1 of 4: Collecting information")
artifact_id = parameters['input_data']
fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id)
if not fps_info or not fps_info['success']:
Expand All @@ -155,17 +176,25 @@ def pick_closed_reference_otus(qclient, job_id, parameters, out_dir):
raise ValueError(error_msg)
reference_fps = ref_info['filepaths']

qclient.update_job_step(job_id, "Step 2 of 3: Generating command")
qclient.update_job_step(job_id, "Step 2 of 4: Generating command")
command, pick_out = generate_pick_closed_reference_otus_cmd(
fps, out_dir, parameters, reference_fps)

qclient.update_job_step(job_id, "Step 3 of 3: Executing OTU picking")
qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking")
std_out, std_err, return_value = system_call(command)
if return_value != 0:
error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s"
% (std_out, std_err))
return format_payload(False, error_msg=error_msg)

qclient.update_job_step(job_id, "Step 4 of 4: tgz sortmerna folder")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Step 4 of 4: tgz sortmerna folder" -> "Step 4 of 4: Generating tgz sortmerna folder"

command = generate_sortmerna_tgz(pick_out)
std_out, std_err, return_value = system_call(command)
if return_value != 0:
error_msg = ("Error while tgz failures:\nStd out: %s\nStd err: %s"
% (std_out, std_err))
return format_payload(False, error_msg=error_msg)

artifacts_info = generate_artifact_info(pick_out)

return format_payload(True, artifacts_info=artifacts_info)
12 changes: 11 additions & 1 deletion qiita_plugins/target_gene/tgp/tests/test_pick_otus.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from tempfile import mkstemp, mkdtemp

from tgp.pick_otus import (write_parameters_file, generate_artifact_info,
generate_pick_closed_reference_otus_cmd)
generate_pick_closed_reference_otus_cmd,
generate_sortmerna_tgz)


class PickOTUsTests(TestCase):
Expand Down Expand Up @@ -66,6 +67,14 @@ def test_generate_pick_closed_reference_otus_cmd(self):
self.assertEqual(obs, exp)
self.assertEqual(obs_dir, join(output_dir, 'cr_otus'))

def test_generate_sortmerna_tgz(self):
outdir = mkdtemp()
self._clean_up_files.append(outdir)
obs = generate_sortmerna_tgz(outdir)
exp = ("tar zcf %s/sortmerna_picked_otus.tgz "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should never hit the situation where there's spaces in the filepaths right? or if we could maybe worth enclosing the paths between quotes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the files are created by the system and all our filenames are only alphanumeric. If you feel strong about it I can add quotes.

"%s/sortmerna_picked_otus" % (outdir, outdir))
self.assertEqual(obs, exp)

def test_generate_pick_closed_reference_otus_cmd_valueerror(self):
filepaths = [('/directory/seqs.log', 'log'),
('/directory/seqs.demux', 'preprocessed_demux')]
Expand Down Expand Up @@ -93,6 +102,7 @@ def test_generate_artifact_info(self):
obs = generate_artifact_info(outdir)
fps = [(join(outdir, "otu_table.biom"), "biom"),
(join(outdir, "sortmerna_picked_otus"), "directory"),
(join(outdir, "sortmerna_picked_otus.tgz"), "tgz"),
(log_fp, "log")]
exp = [['OTU table', 'BIOM', fps]]
self.assertEqual(obs, exp)
Expand Down
Loading