qiita-spots · josenavas · Jan 12, 2018 · Dec 22, 2017 · Dec 22, 2017 · Dec 22, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -3,20 +3,18 @@ language: python
 sudo: false
 env:
   global:
-    - PYTHON_VERSION=2.7
+    - PYTHON_VERSION=3.5
   matrix:
     - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_db
     - TEST_ADD_STUDIES=False COVER_PACKAGE=qiita_pet
     - TEST_ADD_STUDIES=True COVER_PACKAGE="qiita_core qiita_ware"
 before_install:
   - redis-server --version
   - redis-server --port 7777 &
-  - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - wget http://repo.continuum.io/miniconda/Miniconda3-4.3.31-Linux-x86_64.sh -O miniconda.sh
   - chmod +x miniconda.sh
   - ./miniconda.sh -b
   - export PATH=/home/travis/miniconda3/bin:$PATH
-  # Update conda itself
-  - conda update --yes conda
   # Downloading and setting up ascp for EBI testing
   - wget ftp://ftp.microbio.me/pub/qiita/ascp-install-3.5.4.102989-linux-64-qiita.sh -O ascp-install-3.5.4.102989-linux-64-qiita.sh
   - chmod +x ascp-install-3.5.4.102989-linux-64-qiita.sh
@@ -27,7 +25,7 @@ before_install:
 install:
   # install a few of the dependencies that pip would otherwise try to install
   # when intalling scikit-bio
-  - travis_retry conda create -q --yes -n qiita python=$PYTHON_VERSION pip nose flake8
+  - travis_retry conda create -q --yes -n qiita python=2.7 pip nose flake8
     pyzmq 'networkx<2.0' pyparsing natsort mock future libgfortran seaborn nltk
     'pandas>=0.18' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>=1.7' 'h5py>=2.3.1'
   - source activate qiita
@@ -56,7 +54,9 @@ install:
   - cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins/BIOM\ type_2.1.4\ -\ Qiime2.conf
   - touch ~/.bash_profile
   # Install the biom plugin so we can run the analysis tests
-  - travis_retry conda create -q --yes -n qtp-biom --file https://data.qiime2.org/distro/core/qiime2-2017.11-conda-linux-64.txt
+  - wget https://data.qiime2.org/distro/core/qiime2-2017.12-py35-linux-conda.yml
+  - travis_retry conda env create -q -n qtp-biom --file qiime2-2017.12-py35-linux-conda.yml
+  - rm qiime2-2017.12-py35-linux-conda.yml
   - source activate qtp-biom
   - pip install https://github.com/qiita-spots/qiita_client/archive/master.zip
   - pip install https://github.com/qiita-spots/qtp-biom/archive/master.zip --process-dependency-links

diff --git a/qiita_db/__init__.py b/qiita_db/__init__.py
@@ -12,6 +12,7 @@
 import metadata_template
 import analysis
 import artifact
+import archive
 import commands
 import environment_manager
 import exceptions
@@ -29,8 +30,8 @@
 
 __version__ = "0.2.0-dev"
 
-__all__ = ["analysis", "artifact", "base", "commands", "environment_manager",
-           "exceptions", "investigation", "logger", "meta_util",
-           "ontology", "portal", "reference", "search",
+__all__ = ["analysis", "artifact",  "archive", "base", "commands",
+           "environment_manager", "exceptions", "investigation", "logger",
+           "meta_util", "ontology", "portal", "reference", "search",
            "software", "sql_connection", "study", "user", "util",
            "metadata_template", "processing_job"]
diff --git a/qiita_db/archive.py b/qiita_db/archive.py
@@ -0,0 +1,181 @@
+# -----------------------------------------------------------------------------
+# Copyright (c) 2014--, The Qiita Development Team.
+#
+# Distributed under the terms of the BSD 3-clause License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# -----------------------------------------------------------------------------
+
+from __future__ import division
+
+import qiita_db as qdb
+
+
+class Archive(qdb.base.QiitaObject):
+    r"""Extra information for any features stored in a BIOM Artifact
+
+    Methods
+    -------
+    insert_from_biom
+    insert_from_artifact
+
+    See Also
+    --------
+    qiita_db.QiitaObject
+    """
+
+    @classmethod
+    def _inserting_main_steps(cls, ms, features):
+        with qdb.sql_connection.TRN:
+            sql = """INSERT INTO qiita.archive_merging_scheme
+                        (archive_merging_scheme)
+                     SELECT %s WHERE NOT EXISTS (
+                        SELECT 1 FROM qiita.archive_merging_scheme
+                        WHERE archive_merging_scheme = %s)"""
+            qdb.sql_connection.TRN.add(sql, [ms, ms])
+            sql = """SELECT archive_merging_scheme_id
+                     FROM qiita.archive_merging_scheme
+                     WHERE archive_merging_scheme = %s"""
+            qdb.sql_connection.TRN.add(sql, [ms])
+            amsi = qdb.sql_connection.TRN.execute_fetchlast()
+
+            vals = [[amsi, _id, val] for _id, val in features.items()]
+            qdb.sql_connection.TRN.add(
+                "SELECT archive_upsert(%s, %s, %s)", vals, many=True)
+            qdb.sql_connection.TRN.execute()
+
+    @classmethod
+    def insert_from_artifact(cls, artifact, features):
+        r"""Inserts new features to the database based on a given artifact
+
+        Parameters
+        ----------
+        artifact : qiita_db.artifact.Artifact
+            The artifact from which the features were generated
+        features : dict {str: str}
+            A dictionary of the features and the values to be stored
+
+        Raises
+        ------
+        ValueError
+            If the Artifact type is not BIOM
+            If the artifact doesn't have a biom filepath
+        """
+        with qdb.sql_connection.TRN:
+            atype = artifact.artifact_type
+            if atype != 'BIOM':
+                raise ValueError(
+                    "To archive artifact must be BIOM but %s" % atype)
+
+            bfps = [fp for _, fp, fpt in artifact.filepaths if fpt == 'biom']
+            if not bfps:
+                raise ValueError("The artifact has no biom files")
+
+            # [0] as it returns a list
+            ms = qdb.util.get_artifacts_information(
+                [artifact.id])[0]['algorithm']
+
+            cls._inserting_main_steps(ms, features)
+
+    @classmethod
+    def get_merging_scheme_from_job(cls, job):
+        r"""Inserts new features to the database based on a given job
+
+        Parameters
+        ----------
+        job : qiita_db.artifact.Artifact
+            The artifact from which the features were generated
+        features : dict {str: str}
+            A dictionary of the features and the values to be stored
+
+        Raises
+        ------
+            ValueError
+                If the Artifact type is not BIOM
+                If the artifact doesn't have a biom filepath
+        """
+        with qdb.sql_connection.TRN:
+            acmd = job.command
+            ms = acmd.merging_scheme
+
+            # 1. cleaning aparams - the parameters of the main artifact/job
+            temp = acmd.optional_parameters.copy()
+            temp.update(acmd.required_parameters)
+            # list: cause it can be tuple or lists
+            # [0]: the first value is the parameter type
+            tparams = job.parameters.values
+            aparams = ','.join(
+                ['%s: %s' % (k, tparams[k]) for k, v in temp.items()
+                 if list(v)[0] != 'artifact' and k in ms['parameters']])
+            # in theory we could check here for the filepath merging but
+            # as the files haven't been creted we don't have this info.
+            # Additionally, based on the current funtionality, this is not
+            # important as normally the difference between files is just
+            # an additional filtering step
+            if aparams:
+                cname = "%s (%s)" % (acmd.name, aparams)
+            else:
+                cname = acmd.name
+
+            # 2. cleaning pparams - the parameters of the parent artifact
+            # [0] getting the atributes from the first parent
+            pcmd = job.input_artifacts[0].processing_parameters.command
+            palgorithm = 'N/A'
+            if pcmd is not None:
+                pms = pcmd.merging_scheme
+                palgorithm = pcmd.name
+                if pms['parameters']:
+                    pass
+                    # ToDo: Archive
+                    # here we need to check for the parent parameters
+                    # pparams = ','.join(
+                    #     ['%s: %s' % (k, tparams[k]) for k, v in temp.items()
+                    #  if list(v)[0] != 'artifact' and k in ms['parameters']])
+                    #
+                    #         params = ','.join(['%s: %s' % (k, pparams[k])
+                    #                            for k in ms['parameters']])
+                    #         palgorithm = "%s (%s)" % (palgorithm, params)
+                    #
+            algorithm = '%s | %s' % (cname, palgorithm)
+
+            return algorithm
+
+    @classmethod
+    def retrieve_feature_values(cls, archive_merging_scheme=None,
+                                features=None):
+        r"""Retrieves all features/values from the archive
+
+        Parameters
+        ----------
+        archive_merging_scheme : optional, str
+            The name of the archive_merging_scheme to retrieve
+
+        Notes
+        -----
+        If archive_merging_scheme is None it will return all
+        feature values
+        """
+        with qdb.sql_connection.TRN:
+            extras = []
+            vals = []
+            if archive_merging_scheme is not None:
+                extras.append("""archive_merging_scheme = %s""")
+                vals.append(archive_merging_scheme)
+            if features is not None:
+                extras.append("""archive_feature IN %s""")
+                vals.append(tuple(features))
+
+            sql = """SELECT archive_feature, archive_feature_value
+                     FROM qiita.archive_feature_value
+                     LEFT JOIN qiita.archive_merging_scheme
+                        USING (archive_merging_scheme_id) {0}
+                     ORDER BY archive_merging_scheme, archive_feature"""
+
+            if extras:
+                sql = sql.format('WHERE ' + ' AND '.join(extras))
+                qdb.sql_connection.TRN.add(sql, vals)
+            else:
+                qdb.sql_connection.TRN.add(sql.format(''))
+
+            return {k: v for k, v in
+                    qdb.sql_connection.TRN.execute_fetchindex()}
diff --git a/qiita_db/handlers/archive.py b/qiita_db/handlers/archive.py
@@ -7,6 +7,8 @@
 # -----------------------------------------------------------------------------
 
 from .oauth2 import OauthBaseHandler, authenticate_oauth
+from qiita_db.processing_job import ProcessingJob
+from qiita_db.archive import Archive
 
 
 class APIArchiveObservations(OauthBaseHandler):
@@ -19,10 +21,11 @@ def post(self):
         dict
             The contents of the analysis keyed by sample id
         """
-        # job_id = self.get_argument('job_id')
+        job_id = self.get_argument('job_id')
         features = self.request.arguments['features']
 
-        # TODO: search on artifact
-        response = {v: [] for v in features}
+        ms = Archive.get_merging_scheme_from_job(ProcessingJob(job_id))
+        response = Archive.retrieve_feature_values(
+            archive_merging_scheme=ms, features=features)
 
         self.write(response)
diff --git a/qiita_db/handlers/tests/test_archive.py b/qiita_db/handlers/tests/test_archive.py
@@ -14,6 +14,7 @@
 from json import loads
 
 from qiita_db.handlers.tests.oauthbase import OauthTestingBase
+from qiita_db.sql_connection import TRN
 
 
 class APIArchiveObservationsTests(OauthTestingBase):
@@ -32,10 +33,25 @@ def tearDown(self):
                     rmtree(fp)
 
     def test_post(self):
-        obs = self.post('/qiita_db/archive/observations/', headers=self.header,
-                        data={'job_id': 'a_job_id', 'features': ['AA', 'CA']})
-        self.assertEqual(obs.code, 200)
-        self.assertEqual(loads(obs.body), {'AA': [], 'CA': []})
+        # let's archive different values from different jobs
+        with TRN:
+            # 3 - close reference picking
+            # 3 - success
+            sql = """SELECT processing_job_id
+                     FROM qiita.processing_job
+                     WHERE command_id = 3 AND processing_job_status_id = 3"""
+            TRN.add(sql)
+            jobs = TRN.execute_fetchflatten()
+
+            for j in jobs:
+                special_feature = 'AA - %s' % j
+                data = {'job_id': j, 'features': [special_feature, 'CA']}
+                obs = self.post(
+                    '/qiita_db/archive/observations/', headers=self.header,
+                    data=data)
+                exp = {}
+                self.assertEqual(obs.code, 200)
+                self.assertEqual(loads(obs.body), exp)
 
 
 if __name__ == '__main__':

diff --git a/qiita_db/handlers/tests/test_processing_job.py b/qiita_db/handlers/tests/test_processing_job.py
@@ -226,6 +226,40 @@ def test_post_job_success(self):
         self.assertEqual(qdb.util.get_count('qiita.artifact'),
                          exp_artifact_count)
 
+    def test_post_job_success_with_archive(self):
+        pt = npt.assert_warns(
+            qdb.exceptions.QiitaDBWarning,
+            qdb.metadata_template.prep_template.PrepTemplate.create,
+            pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}),
+            qdb.study.Study(1), '16S')
+        job = qdb.processing_job.ProcessingJob.create(
+            qdb.user.User('test@foo.bar'),
+            qdb.software.Parameters.load(
+                qdb.software.Command.get_validator('BIOM'),
+                values_dict={'template': pt.id, 'files':
+                             dumps({'BIOM': ['file']}),
+                             'artifact_type': 'BIOM'}))
+        job._set_status('running')
+
+        fd, fp = mkstemp(suffix='_table.biom')
+        close(fd)
+        with open(fp, 'w') as f:
+            f.write('\n')
+
+        self._clean_up_files.append(fp)
+
+        payload = dumps(
+            {'success': True, 'error': '',
+             'artifacts': {'OTU table': {'filepaths': [(fp, 'biom')],
+                                         'artifact_type': 'BIOM'}},
+             'archive': {'AAAA': 'AAA', 'CCC': 'CCC'}})
+
+        obs = self.post(
+            '/qiita_db/jobs/%s/complete/' % job.id,
+            payload, headers=self.header)
+        wait_for_processing_job(job.id)
+        self.assertEqual(obs.code, 200)
+
 
 class ProcessingJobAPItestHandlerTests(OauthTestingBase):
     def test_post_processing_job(self):

diff --git a/qiita_db/support_files/patches/63.sql b/qiita_db/support_files/patches/63.sql
@@ -0,0 +1,42 @@
+-- December 27th, 2017
+-- Creating archive feature tables
+
+CREATE TABLE qiita.archive_merging_scheme (
+	archive_merging_scheme_id bigserial  NOT NULL,
+	archive_merging_scheme varchar  NOT NULL,
+	CONSTRAINT pk_merging_scheme PRIMARY KEY ( archive_merging_scheme_id )
+ ) ;
+
+CREATE TABLE qiita.archive_feature_value (
+	archive_merging_scheme_id bigint  NOT NULL,
+	archive_feature      varchar  NOT NULL,
+	archive_feature_value varchar  NOT NULL,
+	CONSTRAINT idx_archive_feature_value PRIMARY KEY ( archive_merging_scheme_id, archive_feature )
+ ) ;
+
+CREATE INDEX idx_archive_feature_value_0 ON qiita.archive_feature_value ( archive_merging_scheme_id ) ;
+
+ALTER TABLE qiita.archive_feature_value ADD CONSTRAINT fk_archive_feature_value FOREIGN KEY ( archive_merging_scheme_id ) REFERENCES qiita.archive_merging_scheme( archive_merging_scheme_id );
+
+-- taken from https://goo.gl/YtSvz2
+CREATE OR REPLACE FUNCTION archive_upsert(amsi INT, af VARCHAR, afv VARCHAR) RETURNS VOID AS $$
+BEGIN
+    LOOP
+        -- first try to update the key
+        UPDATE qiita.archive_feature_value SET archive_feature_value = afv WHERE archive_merging_scheme_id = amsi AND archive_feature = af;
+        IF found THEN
+            RETURN;
+        END IF;
+        -- not there, so try to insert the key
+        -- if someone else inserts the same key concurrently,
+        -- we could get a unique-key failure
+        BEGIN
+            INSERT INTO qiita.archive_feature_value (archive_merging_scheme_id, archive_feature, archive_feature_value) VALUES (amsi, af, afv);
+            RETURN;
+        EXCEPTION WHEN unique_violation THEN
+            -- Do nothing, and loop to try the UPDATE again.
+        END;
+    END LOOP;
+END;
+$$
+LANGUAGE plpgsql;