Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@ language: python
sudo: false
env:
global:
- PYTHON_VERSION=2.7
- PYTHON_VERSION=3.5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Long story short: there is a bug in conda that the Qiime2 guys reported and this is the best way to install everything. See that these changes are in master and are passing.

matrix:
- TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_db
- TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_pet
- TEST_ADD_STUDIES=True COVER_PACKAGE="qiita_core qiita_ware"
before_install:
- redis-server --version
- redis-server --port 7777 &
- wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- wget http://repo.continuum.io/miniconda/Miniconda3-4.3.31-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda3/bin:$PATH
# Update conda itself
- conda update --yes conda
# Downloading and setting up ascp for EBI testing
- wget ftp://ftp.microbio.me/pub/qiita/ascp-install-3.5.4.102989-linux-64-qiita.sh -O ascp-install-3.5.4.102989-linux-64-qiita.sh
- chmod +x ascp-install-3.5.4.102989-linux-64-qiita.sh
Expand All @@ -27,7 +25,7 @@ before_install:
install:
# install a few of the dependencies that pip would otherwise try to install
# when intalling scikit-bio
- travis_retry conda create -q --yes -n qiita python=$PYTHON_VERSION pip nose flake8
- travis_retry conda create -q --yes -n qiita python=2.7 pip nose flake8
pyzmq 'networkx<2.0' pyparsing natsort mock future libgfortran seaborn nltk
'pandas>=0.18' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>=1.7' 'h5py>=2.3.1'
- source activate qiita
Expand Down Expand Up @@ -56,7 +54,9 @@ install:
- cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins/BIOM\ type_2.1.4\ -\ Qiime2.conf
- touch ~/.bash_profile
# Install the biom plugin so we can run the analysis tests
- travis_retry conda create -q --yes -n qtp-biom --file https://data.qiime2.org/distro/core/qiime2-2017.11-conda-linux-64.txt
- wget https://data.qiime2.org/distro/core/qiime2-2017.12-py35-linux-conda.yml
- travis_retry conda env create -q -n qtp-biom --file qiime2-2017.12-py35-linux-conda.yml
- rm qiime2-2017.12-py35-linux-conda.yml
- source activate qtp-biom
- pip install https://github.com/qiita-spots/qiita_client/archive/master.zip
- pip install https://github.com/qiita-spots/qtp-biom/archive/master.zip --process-dependency-links
Expand Down
7 changes: 4 additions & 3 deletions qiita_db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import metadata_template
import analysis
import artifact
import archive
import commands
import environment_manager
import exceptions
Expand All @@ -29,8 +30,8 @@

__version__ = "0.2.0-dev"

__all__ = ["analysis", "artifact", "base", "commands", "environment_manager",
"exceptions", "investigation", "logger", "meta_util",
"ontology", "portal", "reference", "search",
__all__ = ["analysis", "artifact", "archive", "base", "commands",
"environment_manager", "exceptions", "investigation", "logger",
"meta_util", "ontology", "portal", "reference", "search",
"software", "sql_connection", "study", "user", "util",
"metadata_template", "processing_job"]
181 changes: 181 additions & 0 deletions qiita_db/archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from __future__ import division

import qiita_db as qdb


class Archive(qdb.base.QiitaObject):
r"""Extra information for any features stored in a BIOM Artifact

Methods
-------
insert_from_biom
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

insert_from_artifact?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup

insert_from_artifact

See Also
--------
qiita_db.QiitaObject
"""

@classmethod
def _inserting_main_steps(cls, ms, features):
with qdb.sql_connection.TRN:
sql = """INSERT INTO qiita.archive_merging_scheme
(archive_merging_scheme)
SELECT %s WHERE NOT EXISTS (
SELECT 1 FROM qiita.archive_merging_scheme
WHERE archive_merging_scheme = %s)"""
qdb.sql_connection.TRN.add(sql, [ms, ms])
sql = """SELECT archive_merging_scheme_id
FROM qiita.archive_merging_scheme
WHERE archive_merging_scheme = %s"""
qdb.sql_connection.TRN.add(sql, [ms])
amsi = qdb.sql_connection.TRN.execute_fetchlast()

vals = [[amsi, _id, val] for _id, val in features.items()]
qdb.sql_connection.TRN.add(
"SELECT archive_upsert(%s, %s, %s)", vals, many=True)
qdb.sql_connection.TRN.execute()

@classmethod
def insert_from_artifact(cls, artifact, features):
r"""Inserts new features to the database based on a given artifact

Parameters
----------
artifact : qiita_db.artifact.Artifact
The artifact from which the features were generated
features : dict {str: str}
A dictionary of the features and the values to be stored

Raises
------
ValueError
If the Artifact type is not BIOM
If the artifact doesn't have a biom filepath
"""
with qdb.sql_connection.TRN:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like most of the logic contained can be done outside of the context?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possible but I think it's better to leave it like this as some of these steps are actually accessing the DB so this assures that everything it's in the same transaction.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh. implicit operations remote resource operations? :/ okay.

atype = artifact.artifact_type
if atype != 'BIOM':
raise ValueError(
"To archive artifact must be BIOM but %s" % atype)

bfps = [fp for _, fp, fpt in artifact.filepaths if fpt == 'biom']
if not bfps:
raise ValueError("The artifact has no biom files")

# [0] as it returns a list
ms = qdb.util.get_artifacts_information(
[artifact.id])[0]['algorithm']

cls._inserting_main_steps(ms, features)

@classmethod
def get_merging_scheme_from_job(cls, job):
r"""Inserts new features to the database based on a given job

Parameters
----------
job : qiita_db.artifact.Artifact
The artifact from which the features were generated
features : dict {str: str}
A dictionary of the features and the values to be stored

Raises
------
ValueError
If the Artifact type is not BIOM
If the artifact doesn't have a biom filepath
"""
with qdb.sql_connection.TRN:
acmd = job.command
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same ... 😄

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

off topic but how does a developer know whether a given qiita method (or property) will issue a db call?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the safest assumption is that any property/method from a Qiita object will issue a DB call ... as everything is store in there.

ms = acmd.merging_scheme

# 1. cleaning aparams - the parameters of the main artifact/job
temp = acmd.optional_parameters.copy()
temp.update(acmd.required_parameters)
# list: cause it can be tuple or lists
# [0]: the first value is the parameter type
tparams = job.parameters.values
aparams = ','.join(
['%s: %s' % (k, tparams[k]) for k, v in temp.items()
if list(v)[0] != 'artifact' and k in ms['parameters']])
# in theory we could check here for the filepath merging but
# as the files haven't been creted we don't have this info.
# Additionally, based on the current funtionality, this is not
# important as normally the difference between files is just
# an additional filtering step
if aparams:
cname = "%s (%s)" % (acmd.name, aparams)
else:
cname = acmd.name

# 2. cleaning pparams - the parameters of the parent artifact
# [0] getting the atributes from the first parent
pcmd = job.input_artifacts[0].processing_parameters.command
palgorithm = 'N/A'
if pcmd is not None:
pms = pcmd.merging_scheme
palgorithm = pcmd.name
if pms['parameters']:
pass
# ToDo: Archive
# here we need to check for the parent parameters
# pparams = ','.join(
# ['%s: %s' % (k, tparams[k]) for k, v in temp.items()
# if list(v)[0] != 'artifact' and k in ms['parameters']])
#
# params = ','.join(['%s: %s' % (k, pparams[k])
# for k in ms['parameters']])
# palgorithm = "%s (%s)" % (palgorithm, params)
#
algorithm = '%s | %s' % (cname, palgorithm)

return algorithm

@classmethod
def retrieve_feature_values(cls, archive_merging_scheme=None,
features=None):
r"""Retrieves all features/values from the archive

Parameters
----------
archive_merging_scheme : optional, str
The name of the archive_merging_scheme to retrieve

Notes
-----
If archive_merging_scheme is None it will return all
feature values
"""
with qdb.sql_connection.TRN:
extras = []
vals = []
if archive_merging_scheme is not None:
extras.append("""archive_merging_scheme = %s""")
vals.append(archive_merging_scheme)
if features is not None:
extras.append("""archive_feature IN %s""")
vals.append(tuple(features))

sql = """SELECT archive_feature, archive_feature_value
FROM qiita.archive_feature_value
LEFT JOIN qiita.archive_merging_scheme
USING (archive_merging_scheme_id) {0}
ORDER BY archive_merging_scheme, archive_feature"""

if extras:
sql = sql.format('WHERE ' + ' AND '.join(extras))
qdb.sql_connection.TRN.add(sql, vals)
else:
qdb.sql_connection.TRN.add(sql.format(''))

return {k: v for k, v in
qdb.sql_connection.TRN.execute_fetchindex()}
9 changes: 6 additions & 3 deletions qiita_db/handlers/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# -----------------------------------------------------------------------------

from .oauth2 import OauthBaseHandler, authenticate_oauth
from qiita_db.processing_job import ProcessingJob
from qiita_db.archive import Archive


class APIArchiveObservations(OauthBaseHandler):
Expand All @@ -19,10 +21,11 @@ def post(self):
dict
The contents of the analysis keyed by sample id
"""
# job_id = self.get_argument('job_id')
job_id = self.get_argument('job_id')
features = self.request.arguments['features']

# TODO: search on artifact
response = {v: [] for v in features}
ms = Archive.get_merging_scheme_from_job(ProcessingJob(job_id))
response = Archive.retrieve_feature_values(
archive_merging_scheme=ms, features=features)

self.write(response)
24 changes: 20 additions & 4 deletions qiita_db/handlers/tests/test_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from json import loads

from qiita_db.handlers.tests.oauthbase import OauthTestingBase
from qiita_db.sql_connection import TRN


class APIArchiveObservationsTests(OauthTestingBase):
Expand All @@ -32,10 +33,25 @@ def tearDown(self):
rmtree(fp)

def test_post(self):
obs = self.post('/qiita_db/archive/observations/', headers=self.header,
data={'job_id': 'a_job_id', 'features': ['AA', 'CA']})
self.assertEqual(obs.code, 200)
self.assertEqual(loads(obs.body), {'AA': [], 'CA': []})
# let's archive different values from different jobs
with TRN:
# 3 - close reference picking
# 3 - success
sql = """SELECT processing_job_id
FROM qiita.processing_job
WHERE command_id = 3 AND processing_job_status_id = 3"""
TRN.add(sql)
jobs = TRN.execute_fetchflatten()

for j in jobs:
special_feature = 'AA - %s' % j
data = {'job_id': j, 'features': [special_feature, 'CA']}
obs = self.post(
'/qiita_db/archive/observations/', headers=self.header,
data=data)
exp = {}
self.assertEqual(obs.code, 200)
self.assertEqual(loads(obs.body), exp)


if __name__ == '__main__':
Expand Down
34 changes: 34 additions & 0 deletions qiita_db/handlers/tests/test_processing_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,40 @@ def test_post_job_success(self):
self.assertEqual(qdb.util.get_count('qiita.artifact'),
exp_artifact_count)

def test_post_job_success_with_archive(self):
pt = npt.assert_warns(
qdb.exceptions.QiitaDBWarning,
qdb.metadata_template.prep_template.PrepTemplate.create,
pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}),
qdb.study.Study(1), '16S')
job = qdb.processing_job.ProcessingJob.create(
qdb.user.User('test@foo.bar'),
qdb.software.Parameters.load(
qdb.software.Command.get_validator('BIOM'),
values_dict={'template': pt.id, 'files':
dumps({'BIOM': ['file']}),
'artifact_type': 'BIOM'}))
job._set_status('running')

fd, fp = mkstemp(suffix='_table.biom')
close(fd)
with open(fp, 'w') as f:
f.write('\n')

self._clean_up_files.append(fp)

payload = dumps(
{'success': True, 'error': '',
'artifacts': {'OTU table': {'filepaths': [(fp, 'biom')],
'artifact_type': 'BIOM'}},
'archive': {'AAAA': 'AAA', 'CCC': 'CCC'}})

obs = self.post(
'/qiita_db/jobs/%s/complete/' % job.id,
payload, headers=self.header)
wait_for_processing_job(job.id)
self.assertEqual(obs.code, 200)


class ProcessingJobAPItestHandlerTests(OauthTestingBase):
def test_post_processing_job(self):
Expand Down
42 changes: 42 additions & 0 deletions qiita_db/support_files/patches/63.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
-- December 27th, 2017
-- Creating archive feature tables

CREATE TABLE qiita.archive_merging_scheme (
archive_merging_scheme_id bigserial NOT NULL,
archive_merging_scheme varchar NOT NULL,
CONSTRAINT pk_merging_scheme PRIMARY KEY ( archive_merging_scheme_id )
) ;

CREATE TABLE qiita.archive_feature_value (
archive_merging_scheme_id bigint NOT NULL,
archive_feature varchar NOT NULL,
archive_feature_value varchar NOT NULL,
CONSTRAINT idx_archive_feature_value PRIMARY KEY ( archive_merging_scheme_id, archive_feature )
) ;

CREATE INDEX idx_archive_feature_value_0 ON qiita.archive_feature_value ( archive_merging_scheme_id ) ;

ALTER TABLE qiita.archive_feature_value ADD CONSTRAINT fk_archive_feature_value FOREIGN KEY ( archive_merging_scheme_id ) REFERENCES qiita.archive_merging_scheme( archive_merging_scheme_id );

-- taken from https://goo.gl/YtSvz2
CREATE OR REPLACE FUNCTION archive_upsert(amsi INT, af VARCHAR, afv VARCHAR) RETURNS VOID AS $$
BEGIN
LOOP
-- first try to update the key
UPDATE qiita.archive_feature_value SET archive_feature_value = afv WHERE archive_merging_scheme_id = amsi AND archive_feature = af;
IF found THEN
RETURN;
END IF;
-- not there, so try to insert the key
-- if someone else inserts the same key concurrently,
-- we could get a unique-key failure
BEGIN
INSERT INTO qiita.archive_feature_value (archive_merging_scheme_id, archive_feature, archive_feature_value) VALUES (amsi, af, afv);
RETURN;
EXCEPTION WHEN unique_violation THEN
-- Do nothing, and loop to try the UPDATE again.
END;
END LOOP;
END;
$$
LANGUAGE plpgsql;
Loading