Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions qiita_core/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from qiita_core.util import (
send_email, qiita_test_checker, execute_as_transaction, get_qiita_version,
is_test_environment, get_release_info)
from qiita_db.meta_util import generate_biom_and_metadata_release
from qiita_db.meta_util import (
generate_biom_and_metadata_release, generate_plugin_releases)
import qiita_db as qdb


Expand Down Expand Up @@ -70,15 +71,16 @@ def test_get_release_info(self):
generate_biom_and_metadata_release('private')
# just checking that is not empty cause the MD5 will change on every
# run
md5sum, filepath, timestamp = get_release_info('private')
self.assertNotEqual(md5sum, '')
self.assertNotEqual(filepath, '')
self.assertNotEqual(timestamp, '')

md5sum, filepath, timestamp = get_release_info('public')
self.assertEqual(md5sum, '')
self.assertEqual(filepath, '')
self.assertEqual(timestamp, '')
biom_metadata_release, archive_release = get_release_info('private')
# note that we are testing not eqaul as we should have some information
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eqaul -> equal

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to hash some invariant, like metadata stored inside the BIOM file instead? Perhaps that's enough.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, added a tests to check that at least the filename is what we expect ...

# but as the md5 will change is pretty hard to test equal
self.assertNotEqual(biom_metadata_release, ('', '', ''))
self.assertEqual(archive_release, ('', '', ''))

generate_plugin_releases()
biom_metadata_release, archive_release = get_release_info('public')
self.assertEqual(biom_metadata_release, ('', '', ''))
self.assertNotEqual(archive_release, ('', '', ''))


if __name__ == '__main__':
Expand Down
18 changes: 15 additions & 3 deletions qiita_core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def get_qiita_version():


def get_release_info(study_status='public'):
"""Returns the study status release MD5
"""Returns the studies and the archive release details

Parameters
----------
Expand All @@ -155,7 +155,7 @@ def get_release_info(study_status='public'):

Returns
------
str, str, str
((str, str, str), (str, str, str))
The release MD5, filepath and timestamp
"""
portal = qiita_config.portal
Expand All @@ -168,5 +168,17 @@ def get_release_info(study_status='public'):
filepath = ''
if timestamp is None:
timestamp = ''
biom_metadata_release = ((md5sum, filepath, timestamp))

return md5sum, filepath, timestamp
md5sum = r_client.get('release-archive:md5sum')
filepath = r_client.get('release-archive:filepath')
timestamp = r_client.get('release-archive:time')
if md5sum is None:
md5sum = ''
if filepath is None:
filepath = ''
if timestamp is None:
timestamp = ''
archive_release = ((md5sum, filepath, timestamp))

return (biom_metadata_release, archive_release)
25 changes: 22 additions & 3 deletions qiita_db/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import qiita_db as qdb


class Archive(object):
class Archive(qdb.base.QiitaObject):
r"""Extra information for any features stored in a BIOM Artifact

Methods
Expand All @@ -26,6 +26,25 @@ class Archive(object):
qiita_db.QiitaObject
"""

@classmethod
def merging_schemes(cls):
r"""Returns the available merging schemes

Returns
-------
Iterator
Iterator over the sample ids

See Also
--------
keys
"""
with qdb.sql_connection.TRN:
sql = """SELECT archive_merging_scheme_id, archive_merging_scheme
FROM qiita.archive_merging_scheme"""
qdb.sql_connection.TRN.add(sql)
return dict(qdb.sql_connection.TRN.execute_fetchindex())

@classmethod
def _inserting_main_steps(cls, ms, features):
with qdb.sql_connection.TRN:
Expand Down Expand Up @@ -178,9 +197,9 @@ def retrieve_feature_values(cls, archive_merging_scheme=None,
else:
qdb.sql_connection.TRN.add(sql.format(''))

return {k: v for k, v in
qdb.sql_connection.TRN.execute_fetchindex()}
return dict(qdb.sql_connection.TRN.execute_fetchindex())

@classmethod
def insert_features(self, merging_scheme, features):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hate to sound picky, but if insert_features is going to be a class method, can you change all instances of self to cls, just for convention's sake?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! Will merge as soon as testing is completed.

r"""Inserts new features to the database based on a given artifact

Expand Down
3 changes: 1 addition & 2 deletions qiita_db/handlers/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,4 @@ def patch(self):

ms = Archive.get_merging_scheme_from_job(ProcessingJob(req_path))

archive = Archive()
self.write(archive.insert_features(ms, loads(req_value)))
self.write(Archive.insert_features(ms, loads(req_value)))
80 changes: 79 additions & 1 deletion qiita_db/meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from __future__ import division

from os import stat, makedirs, rename
from os.path import join, relpath, exists
from os.path import join, relpath, exists, basename
from time import strftime, localtime
import matplotlib.pyplot as plt
import matplotlib as mpl
Expand All @@ -36,6 +36,8 @@
from datetime import datetime
from tarfile import open as topen, TarInfo
from hashlib import md5
from re import sub
from json import loads, dump

from qiita_core.qiita_settings import qiita_config, r_client
from qiita_core.configuration_manager import ConfigurationManager
Expand Down Expand Up @@ -432,3 +434,79 @@ def generate_biom_and_metadata_release(study_status='public'):
# important to "flush" variables to avoid errors
r_client.delete(redis_key)
f(redis_key, v)


def generate_plugin_releases():
"""Generate releases for plugins
"""
ARCHIVE = qdb.archive.Archive
qiita_config = ConfigurationManager()
working_dir = qiita_config.working_dir

commands = [c for s in qdb.software.Software.iter(active=True)
for c in s.commands if c.post_processing_cmd is not None]

tnow = datetime.now()
ts = tnow.strftime('%m%d%y-%H%M%S')
tgz_dir = join(working_dir, 'releases', 'archive')
if not exists(tgz_dir):
makedirs(tgz_dir)
tgz_dir_release = join(tgz_dir, ts)
if not exists(tgz_dir_release):
makedirs(tgz_dir_release)
for cmd in commands:
cmd_name = cmd.name
mschemes = [v for _, v in ARCHIVE.merging_schemes().iteritems()
if cmd_name in v]
for ms in mschemes:
ms_name = sub('[^0-9a-zA-Z]+', '', ms)
ms_fp = join(tgz_dir_release, ms_name)
if not exists(ms_fp):
makedirs(ms_fp)

pfp = join(ms_fp, 'archive.json')
archives = {k: loads(v)
for k, v in ARCHIVE.retrieve_feature_values(
archive_merging_scheme=ms).iteritems()
if v != ''}
with open(pfp, 'w') as f:
dump(archives, f)

# now let's run the post_processing_cmd
ppc = cmd.post_processing_cmd

# concatenate any other parameters into a string
params = ' '.join(["%s=%s" % (k, v) for k, v in
ppc['script_params'].items()])
# append archives file and output dir parameters
params = ("%s --fp_archive=%s --output_dir=%s" % (
params, pfp, ms_fp))

ppc_cmd = "%s %s %s" % (
ppc['script_env'], ppc['script_path'], params)
p_out, p_err, rv = qdb.processing_job._system_call(ppc_cmd)
p_out = p_out.decode("utf-8").rstrip()
if rv != 0:
raise ValueError('Error %d: %s' % (rv, p_out))
p_out = loads(p_out)

# tgz-ing all files
tgz_name = join(tgz_dir, 'archive-%s-building.tgz' % ts)
tgz_name_final = join(tgz_dir, 'archive.tgz')
with topen(tgz_name, "w|gz") as tgz:
tgz.add(tgz_dir_release, arcname=basename(tgz_dir_release))
# getting the release md5
with open(tgz_name, "rb") as f:
md5sum = md5()
for c in iter(lambda: f.read(4096), b""):
md5sum.update(c)
rename(tgz_name, tgz_name_final)
vals = [
('filepath', tgz_name_final[len(working_dir):], r_client.set),
('md5sum', md5sum.hexdigest(), r_client.set),
('time', tnow.strftime('%m-%d-%y %H:%M:%S'), r_client.set)]
for k, v, f in vals:
redis_key = 'release-archive:%s' % k
# important to "flush" variables to avoid errors
r_client.delete(redis_key)
f(redis_key, v)
8 changes: 8 additions & 0 deletions qiita_db/test/test_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
@qiita_test_checker()
class ArchiveTest(TestCase):
def test_insert_from_biom_and_retrieve_feature_values(self):
# merging_scheme should be empty
self.assertDictEqual(qdb.archive.Archive.merging_schemes(), dict())

# 1 - to test error as it's FASTQ
with self.assertRaises(ValueError) as err:
qdb.archive.Archive.insert_from_artifact(
Expand Down Expand Up @@ -63,6 +66,11 @@ def test_insert_from_biom_and_retrieve_feature_values(self):
obs = qdb.archive.Archive.retrieve_feature_values('Nothing')
self.assertEqual(obs, exp)

# now merging_schemes should have 3 elements
self.assertDictEqual(qdb.archive.Archive.merging_schemes(), {
1: 'Pick closed-reference OTUs | Split libraries FASTQ',
2: '', 3: 'Single Rarefaction | N/A'})

def test_get_merging_scheme_from_job(self):
exp = 'Split libraries FASTQ | N/A'
obs = qdb.archive.Archive.get_merging_scheme_from_job(
Expand Down
13 changes: 13 additions & 0 deletions qiita_db/test/test_meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,19 @@ def test_generate_biom_and_metadata_release(self):
"UPDATE settings SET base_data_dir = '%s'" % obdr)
bdr = qdb.sql_connection.TRN.execute()

def test_generate_plugin_releases(self):
qdb.meta_util.generate_plugin_releases()

working_dir = qiita_config.working_dir
tgz = r_client.get('release-archive:filepath')
with topen(join(working_dir, tgz), "r:gz") as tmp:
tgz_obs = [ti.name for ti in tmp]
# the expected folder/file in the tgz should be named as the time
# when it was created so let's test that
time = r_client.get('release-archive:time').replace('-', '').replace(
':', '').replace(' ', '-')
self.assertEqual(tgz_obs, [time])


if __name__ == '__main__':
main()
6 changes: 5 additions & 1 deletion qiita_pet/handlers/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,11 @@ def get(self, study_id):
class DownloadRelease(BaseHandlerDownload):
@coroutine
def get(self, extras):
_, relpath, _ = get_release_info()
biom_metadata_release, archive_release = get_release_info()
if extras == 'archive':
relpath = archive_release[1]
else:
relpath = biom_metadata_release[1]

# If we don't have nginx, write a file that indicates this
# Note that this configuration will automatically create and download
Expand Down
17 changes: 14 additions & 3 deletions qiita_pet/templates/sitebase.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
{% set sysmessage = r_client.get('sysmessage') %}
{% set user = current_user %}
{% set qiita_version, qiita_sha = get_qiita_version() %}
{% set public_md5, _, public_timestamp = get_release_info() %}
{% set biom_metadata_release, archive_release = get_release_info() %}

{% set level = globals().get('level', '') %}
{% if level not in {'danger', 'success', 'info', 'warning'} %}
Expand Down Expand Up @@ -490,12 +490,23 @@
<li>
<a href="https://github.com/biocore/qiita/blob/master/README.rst#current-features">Current and Future Features</a>
</li>
<li role="separator" class="divider"></li>
<li>
<a type="button" data-toggle="modal" data-target=".qiita_pet_download_confirm">
Download public BIOM and metadata files
<small>
<br/><b>MD5:</b> {{public_md5}}
<br/><b>Last update:</b> {{public_timestamp}}
<br/><b>MD5:</b> {{biom_metadata_release[0]}}
<br/><b>Last update:</b> {{biom_metadata_release[2]}}
</small>
</a>
</li>
<li role="separator" class="divider"></li>
<li>
<a type="button" href="{% raw qiita_config.portal_dir %}/release/download/archive">
BETA: Download Archive files (for example, deblur trees)
<small>
<br/><b>MD5:</b> {{archive_release[0]}}
<br/><b>Last update:</b> {{archive_release[2]}}
</small>
</a>
</li>
Expand Down
1 change: 1 addition & 0 deletions scripts/all-qiita-cron-job
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ qiita-cron-job generate_biom_and_metadata_release
qiita-cron-job purge_filepaths
qiita-cron-job purge_files_from_filesystem
qiita-cron-job update_redis_stats
qiita-cron-job generate_plugin_releases
8 changes: 7 additions & 1 deletion scripts/qiita-cron-job
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ from qiita_db.util import (
from qiita_db.meta_util import (
update_redis_stats as qiita_update_redis_stats,
generate_biom_and_metadata_release as
qiita_generate_biom_and_metadata_release)
qiita_generate_biom_and_metadata_release,
generate_plugin_releases as qiita_generate_plugin_releases)


@click.group()
Expand Down Expand Up @@ -59,5 +60,10 @@ def generate_biom_and_metadata_release():
qiita_generate_biom_and_metadata_release('public')


@commands.command()
def generate_plugin_releases():
qiita_generate_plugin_releases()


if __name__ == "__main__":
commands()