qiita-spots
diff --git a/‎.travis.yml
Lines changed: 2 additions & 2 deletions b/‎.travis.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎qiita_db/data.py
Lines changed: 21 additions & 0 deletions b/‎qiita_db/data.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎qiita_db/search.py
Lines changed: 60 additions & 2 deletions b/‎qiita_db/search.py
Lines changed: 60 additions & 2 deletions
diff --git a/‎qiita_db/study.py
Lines changed: 48 additions & 2 deletions b/‎qiita_db/study.py
Lines changed: 48 additions & 2 deletions
diff --git a/‎qiita_db/support_files/patches/20.sql
Lines changed: 6 additions & 14 deletions b/‎qiita_db/support_files/patches/20.sql
Lines changed: 6 additions & 14 deletions
diff --git a/‎qiita_db/support_files/patches/21.sql
Lines changed: 14 additions & 0 deletions b/‎qiita_db/support_files/patches/21.sql
Lines changed: 14 additions & 0 deletions
diff --git a/‎qiita_db/support_files/populate_test_db.sql
Lines changed: 1 addition & 1 deletion b/‎qiita_db/support_files/populate_test_db.sql
Lines changed: 1 addition & 1 deletion
diff --git a/‎qiita_db/support_files/qiita-db.dbs
Lines changed: 7 additions & 7 deletions b/‎qiita_db/support_files/qiita-db.dbs
Lines changed: 7 additions & 7 deletions
@@ -17,11 +17,11 @@ before_install:
 install:
   # install a few of the dependencies that pip would otherwise try to install
   # when intalling scikit-bio
-  - conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
+  - travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
   - source activate env_name
   - pip install coveralls ipython[all]==2.4.1
   - pip install https://github.com/biocore/mustached-octo-ironman/archive/master.zip
-  - pip install .
+  - travis_retry pip install .
 script:
   - export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.txt
   - ipython profile create qiita_general --parallel
 
@@ -1352,6 +1352,27 @@ def processing_info(self):
 
         return dynamic_info
 
+    @property
+    def samples(self):
+        """Return the samples available according to prep template
+
+        Returns
+        -------
+        set
+            all sample_ids available for the processed data
+        """
+        conn_handler = SQLConnectionHandler()
+        # Get the prep template id for teh dynamic table lookup
+        sql = """SELECT ptp.prep_template_id FROM
+            qiita.prep_template_preprocessed_data ptp JOIN
+            qiita.preprocessed_processed_data ppd USING (preprocessed_data_id)
+            WHERE ppd.processed_data_id = %s"""
+        prep_id = conn_handler.execute_fetchone(sql, [self._id])[0]
+
+        # Get samples from dynamic table
+        sql = "SELECT sample_id FROM qiita.prep_%d" % prep_id
+        return set(s[0] for s in conn_handler.execute_fetchall(sql))
+
     @property
     def status(self):
         conn_handler = SQLConnectionHandler()
 
@@ -65,10 +65,15 @@
 from pyparsing import (alphas, nums, Word, dblQuotedString, oneOf, Optional,
                        opAssoc, CaselessLiteral, removeQuotes, Group,
                        operatorPrecedence, stringEnd)
+from collections import defaultdict
+
+import pandas as pd
+from future.utils import viewitems
 
 from qiita_db.util import scrub_data, convert_type, get_table_cols
 from qiita_db.sql_connection import SQLConnectionHandler
 from qiita_db.study import Study
+from qiita_db.data import ProcessedData
 from qiita_db.exceptions import QiitaDBIncompatibleDatatypeError
 
 
@@ -211,6 +216,8 @@ def __call__(self, searchstr, user):
             if study_res:
                 # only add study to results if actually has samples in results
                 results[sid] = study_res
+        self.results = results
+        self.meta_headers = meta_headers
         return results, meta_headers
 
     def _parse_study_search_string(self, searchstr,
@@ -304,8 +311,8 @@ def _parse_study_search_string(self, searchstr,
 
         # create the study finding SQL
         # remove metadata headers that are in required_sample_info table
-        meta_headers = meta_headers.difference(self.required_cols).difference(
-            self.study_cols)
+        meta_headers = tuple(meta_headers.difference(
+            self.required_cols).difference(self.study_cols))
 
         # get all study ids that contain all metadata categories searched for
         sql = []
@@ -347,3 +354,54 @@ def _parse_study_search_string(self, searchstr,
                       "r.study_id WHERE %s" %
                       (','.join(header_info), sql_where))
         return study_sql, sample_sql, meta_header_type_lookup.keys()
+
+    def filter_by_processed_data(self, datatypes=None):
+        """Filters results to what is available in each processed data
+
+        Parameters
+        ----------
+        datatypes : list of str, optional
+            Datatypes to selectively return. Default all datatypes available
+
+        Returns
+        -------
+        study_proc_ids : dict of dicts of lists
+            Processed data ids with samples for each study, in the format
+            {study_id: {datatype: [proc_id, proc_id, ...], ...}, ...}
+        proc_data_samples : dict of lists
+            Samples available in each processed data id, in the format
+            {proc_data_id: [samp_id1, samp_id2, ...], ...}
+        samples_meta : dict of pandas DataFrames
+            metadata for the found samples, keyed by study. Pandas indexed on
+            sample_id, column headers are the metadata categories searched
+            over
+        """
+        if datatypes is not None:
+            # convert to set for easy lookups
+            datatypes = set(datatypes)
+        study_proc_ids = {}
+        proc_data_samples = {}
+        samples_meta = {}
+        headers = {c: val for c, val in enumerate(self.meta_headers)}
+        for study_id, study_meta in viewitems(self.results):
+            # add metadata to dataframe and dict
+            # use from_dict because pandas doesn't like cursor objects
+            samples_meta[study_id] = pd.DataFrame.from_dict(
+                {s[0]: s[1:] for s in study_meta}, orient='index')
+            samples_meta[study_id].rename(columns=headers, inplace=True)
+            # set up study-based data needed
+            study = Study(study_id)
+            study_sample_ids = {s[0] for s in study_meta}
+            study_proc_ids[study_id] = defaultdict(list)
+            for proc_data_id in study.processed_data():
+                proc_data = ProcessedData(proc_data_id)
+                datatype = proc_data.data_type()
+                # skip processed data if it doesn't fit the given datatypes
+                if datatypes is not None and datatype not in datatypes:
+                    continue
+                filter_samps = proc_data.samples.intersection(study_sample_ids)
+                if filter_samps:
+                    proc_data_samples[proc_data_id] = sorted(filter_samps)
+                    study_proc_ids[study_id][datatype].append(proc_data_id)
+
+        return study_proc_ids, proc_data_samples, samples_meta
@@ -98,12 +98,14 @@
 from __future__ import division
 from future.utils import viewitems
 from copy import deepcopy
+from itertools import chain
+import warnings
 
 from qiita_core.exceptions import IncompetentQiitaDeveloperError
 from .base import QiitaObject
 from .exceptions import (QiitaDBStatusError, QiitaDBColumnError, QiitaDBError)
 from .util import (check_required_columns, check_table_cols, convert_to_id,
-                   get_environmental_packages, infer_status)
+                   get_environmental_packages, get_table_cols, infer_status)
 from .sql_connection import SQLConnectionHandler
 
 
@@ -142,7 +144,12 @@ class Study(QiitaObject):
     """
     _table = "study"
     # The following columns are considered not part of the study info
-    _non_info = {"email", "study_title"}
+    _non_info = frozenset(["email", "study_title"])
+    # The following tables are considered part of info
+    _info_cols = frozenset(chain(
+        get_table_cols('study'), get_table_cols('study_status'),
+        get_table_cols('timeseries_type'), get_table_cols('portal_type'),
+        get_table_cols('study_pmid')))
 
     def _lock_non_sandbox(self, conn_handler):
         """Raises QiitaDBStatusError if study is non-sandboxed"""
@@ -198,6 +205,45 @@ def get_by_status(cls, status):
 
         return studies
 
+    @classmethod
+    def get_info(cls, study_ids=None, info_cols=None):
+        """Returns study data for a set of study_ids
+
+        Parameters
+        ----------
+        study_ids : list of ints, optional
+            Studies to get information for. Defauls to all studies
+        info_cols: list of str, optional
+            Information columns to retrieve. Defaults to all study data
+
+        Returns
+        -------
+        list of DictCursor
+            Table-like structure of metadata, one study per row. Can be
+            accessed as a list of dictionaries, keyed on column name.
+        """
+        if info_cols is None:
+            info_cols = cls._info_cols
+        elif not cls._info_cols.issuperset(info_cols):
+            warnings.warn("Non-info columns passed: %s" % ", ".join(
+                set(info_cols) - cls._info_cols))
+
+        search_cols = ",".join(sorted(cls._info_cols.intersection(info_cols)))
+
+        sql = """SELECT {0} FROM (
+            qiita.study
+            JOIN qiita.timeseries_type  USING (timeseries_type_id)
+            JOIN qiita.portal_type USING (portal_type_id)
+            LEFT JOIN (SELECT study_id, array_agg(pmid ORDER BY pmid) as
+            pmid FROM qiita.study_pmid GROUP BY study_id) sp USING (study_id)
+            )""".format(search_cols)
+        if study_ids is not None:
+            sql = "{0} WHERE study_id in ({1})".format(
+                sql, ','.join(str(s) for s in study_ids))
+
+        conn_handler = SQLConnectionHandler()
+        return conn_handler.execute_fetchall(sql)
+
     @classmethod
     def exists(cls, study_title):
         """Check if a study exists based on study_title, which is unique
 
@@ -1,14 +1,6 @@
--- March 28, 2015
--- Add default analyses for all existing users
-DO $do$
-DECLARE 
-	eml varchar;
-	aid bigint;
-BEGIN
-FOR eml IN
-	SELECT email FROM qiita.qiita_user
-LOOP
-	INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid;
-	INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2);
-END LOOP;
-END $do$;
+-- March 19, 2015
+-- Rename columns to be more descriptive and allow easier joins
+ALTER TABLE qiita.processed_data_status RENAME COLUMN description TO processed_data_status_description;
+ALTER TABLE qiita.portal_type RENAME COLUMN description TO portal_description;
+ALTER TABLE qiita.investigation RENAME COLUMN description TO investigation_description;
+ALTER TABLE qiita.investigation RENAME COLUMN name TO investigation_name;
@@ -0,0 +1,14 @@
+-- March 28, 2015
+-- Add default analyses for all existing users
+DO $do$
+DECLARE 
+	eml varchar;
+	aid bigint;
+BEGIN
+FOR eml IN
+	SELECT email FROM qiita.qiita_user
+LOOP
+	INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid;
+	INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2);
+END LOOP;
+END $do$;
@@ -49,7 +49,7 @@ INSERT INTO qiita.study_users (study_id, email) VALUES (1, 'shared@foo.bar');
 INSERT INTO qiita.study_pmid (study_id, pmid) VALUES (1, '123456'), (1, '7891011');
 
 -- Insert an investigation
-INSERT INTO qiita.investigation (name, description, contact_person_id) VALUES
+INSERT INTO qiita.investigation (investigation_name, investigation_description, contact_person_id) VALUES
 	('TestInvestigation', 'An investigation for testing purposes', 3);
 
 -- Insert investigation_study (link study 1 with investigation 1)
 
@@ -518,8 +518,8 @@
 		<table name="investigation" >
 			<comment>Overarching investigation information.An investigation comprises one or more individual studies.</comment>
 			<column name="investigation_id" type="bigserial" jt="-5" mandatory="y" />
-			<column name="name" type="varchar" jt="12" mandatory="y" />
-			<column name="description" type="varchar" jt="12" mandatory="y" >
+			<column name="investigation_name" type="varchar" jt="12" mandatory="y" />
+			<column name="investigation_description" type="varchar" jt="12" mandatory="y" >
 				<comment><![CDATA[Describes the overarching goal of the investigation]]></comment>
 			</column>
 			<column name="contact_person_id" type="bigint" jt="-5" />
@@ -682,7 +682,7 @@
 			<comment>What portals are available to show a study in</comment>
 			<column name="portal_type_id" type="bigserial" jt="-5" mandatory="y" />
 			<column name="portal" type="varchar" jt="12" mandatory="y" />
-			<column name="description" type="varchar" jt="12" mandatory="y" />
+			<column name="portal_description" type="varchar" jt="12" mandatory="y" />
 			<index name="pk_portal_type" unique="PRIMARY_KEY" >
 				<column name="portal_type_id" />
 			</index>
@@ -1000,7 +1000,7 @@
 		<table name="processed_data_status" >
 			<column name="processed_data_status_id" type="bigserial" jt="-5" mandatory="y" />
 			<column name="processed_data_status" type="varchar" jt="12" mandatory="y" />
-			<column name="description" type="varchar" jt="12" mandatory="y" />
+			<column name="processed_data_status_description" type="varchar" jt="12" mandatory="y" />
 			<index name="pk_study_status" unique="PRIMARY_KEY" >
 				<column name="processed_data_status_id" />
 			</index>
@@ -1565,7 +1565,6 @@ Controlled Vocabulary]]></comment>
 	<connector name="PostgreSQL" database="PostgreSQL" driver_class="org.postgresql.Driver" driver_jar="postgresql-9.2-1003.jdbc3.jar" host="localhost" port="5432" instance="qiita_test" user="mcdonadt" schema_mapping="" />
 	<layout id="Layout669806" name="qiita" show_relation_columns="y" >
 		<entity schema="qiita" name="controlled_vocab_values" color="d0def5" x="45" y="1545" />
-		<entity schema="qiita" name="investigation" color="c0d4f3" x="2100" y="255" />
 		<entity schema="qiita" name="investigation_study" color="c0d4f3" x="2100" y="405" />
 		<entity schema="qiita" name="job_results_filepath" color="c0d4f3" x="405" y="855" />
 		<entity schema="qiita" name="analysis_job" color="d0def5" x="285" y="930" />
@@ -1635,10 +1634,11 @@ Controlled Vocabulary]]></comment>
 		<entity schema="qiita" name="analysis" color="d0def5" x="225" y="720" />
 		<entity schema="qiita" name="study_experimental_factor" color="c0d4f3" x="2100" y="495" />
 		<entity schema="qiita" name="study_pmid" color="c0d4f3" x="2145" y="600" />
-		<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
 		<entity schema="qiita" name="study" color="d0def5" x="1815" y="60" />
 		<entity schema="qiita" name="processed_data" color="d0def5" x="1275" y="960" />
-		<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1530" y="1050" />
+		<entity schema="qiita" name="investigation" color="c0d4f3" x="2100" y="255" />
+		<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1500" y="1050" />
+		<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
 		<group name="Group_analyses" color="c4e0f9" >
 			<comment>analysis tables</comment>
 			<entity schema="qiita" name="analysis" />