Skip to content

Commit 90b3de2

Browse files
committed
merge upstream/master
2 parents 68488c7 + 9350269 commit 90b3de2

File tree

21 files changed

+2673
-2484
lines changed

21 files changed

+2673
-2484
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ before_install:
1717
install:
1818
# install a few of the dependencies that pip would otherwise try to install
1919
# when intalling scikit-bio
20-
- conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
20+
- travis_retry conda create --yes -n env_name python=$PYTHON_VERSION pip nose flake8 pyzmq networkx pyparsing natsort mock 'pandas>=0.15' 'matplotlib>=1.1.0' 'scipy>0.13.0' 'numpy>= 1.7' 'h5py>=2.3.1' 'future==0.13.0'
2121
- source activate env_name
2222
- pip install coveralls ipython[all]==2.4.1
2323
- pip install https://github.com/biocore/mustached-octo-ironman/archive/master.zip
24-
- pip install .
24+
- travis_retry pip install .
2525
script:
2626
- export MOI_CONFIG_FP=`pwd`/qiita_core/support_files/config_test.txt
2727
- ipython profile create qiita_general --parallel

qiita_db/data.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,27 @@ def processing_info(self):
13521352

13531353
return dynamic_info
13541354

1355+
@property
1356+
def samples(self):
1357+
"""Return the samples available according to prep template
1358+
1359+
Returns
1360+
-------
1361+
set
1362+
all sample_ids available for the processed data
1363+
"""
1364+
conn_handler = SQLConnectionHandler()
1365+
# Get the prep template id for teh dynamic table lookup
1366+
sql = """SELECT ptp.prep_template_id FROM
1367+
qiita.prep_template_preprocessed_data ptp JOIN
1368+
qiita.preprocessed_processed_data ppd USING (preprocessed_data_id)
1369+
WHERE ppd.processed_data_id = %s"""
1370+
prep_id = conn_handler.execute_fetchone(sql, [self._id])[0]
1371+
1372+
# Get samples from dynamic table
1373+
sql = "SELECT sample_id FROM qiita.prep_%d" % prep_id
1374+
return set(s[0] for s in conn_handler.execute_fetchall(sql))
1375+
13551376
@property
13561377
def status(self):
13571378
conn_handler = SQLConnectionHandler()

qiita_db/search.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,15 @@
6565
from pyparsing import (alphas, nums, Word, dblQuotedString, oneOf, Optional,
6666
opAssoc, CaselessLiteral, removeQuotes, Group,
6767
operatorPrecedence, stringEnd)
68+
from collections import defaultdict
69+
70+
import pandas as pd
71+
from future.utils import viewitems
6872

6973
from qiita_db.util import scrub_data, convert_type, get_table_cols
7074
from qiita_db.sql_connection import SQLConnectionHandler
7175
from qiita_db.study import Study
76+
from qiita_db.data import ProcessedData
7277
from qiita_db.exceptions import QiitaDBIncompatibleDatatypeError
7378

7479

@@ -211,6 +216,8 @@ def __call__(self, searchstr, user):
211216
if study_res:
212217
# only add study to results if actually has samples in results
213218
results[sid] = study_res
219+
self.results = results
220+
self.meta_headers = meta_headers
214221
return results, meta_headers
215222

216223
def _parse_study_search_string(self, searchstr,
@@ -304,8 +311,8 @@ def _parse_study_search_string(self, searchstr,
304311

305312
# create the study finding SQL
306313
# remove metadata headers that are in required_sample_info table
307-
meta_headers = meta_headers.difference(self.required_cols).difference(
308-
self.study_cols)
314+
meta_headers = tuple(meta_headers.difference(
315+
self.required_cols).difference(self.study_cols))
309316

310317
# get all study ids that contain all metadata categories searched for
311318
sql = []
@@ -347,3 +354,54 @@ def _parse_study_search_string(self, searchstr,
347354
"r.study_id WHERE %s" %
348355
(','.join(header_info), sql_where))
349356
return study_sql, sample_sql, meta_header_type_lookup.keys()
357+
358+
def filter_by_processed_data(self, datatypes=None):
359+
"""Filters results to what is available in each processed data
360+
361+
Parameters
362+
----------
363+
datatypes : list of str, optional
364+
Datatypes to selectively return. Default all datatypes available
365+
366+
Returns
367+
-------
368+
study_proc_ids : dict of dicts of lists
369+
Processed data ids with samples for each study, in the format
370+
{study_id: {datatype: [proc_id, proc_id, ...], ...}, ...}
371+
proc_data_samples : dict of lists
372+
Samples available in each processed data id, in the format
373+
{proc_data_id: [samp_id1, samp_id2, ...], ...}
374+
samples_meta : dict of pandas DataFrames
375+
metadata for the found samples, keyed by study. Pandas indexed on
376+
sample_id, column headers are the metadata categories searched
377+
over
378+
"""
379+
if datatypes is not None:
380+
# convert to set for easy lookups
381+
datatypes = set(datatypes)
382+
study_proc_ids = {}
383+
proc_data_samples = {}
384+
samples_meta = {}
385+
headers = {c: val for c, val in enumerate(self.meta_headers)}
386+
for study_id, study_meta in viewitems(self.results):
387+
# add metadata to dataframe and dict
388+
# use from_dict because pandas doesn't like cursor objects
389+
samples_meta[study_id] = pd.DataFrame.from_dict(
390+
{s[0]: s[1:] for s in study_meta}, orient='index')
391+
samples_meta[study_id].rename(columns=headers, inplace=True)
392+
# set up study-based data needed
393+
study = Study(study_id)
394+
study_sample_ids = {s[0] for s in study_meta}
395+
study_proc_ids[study_id] = defaultdict(list)
396+
for proc_data_id in study.processed_data():
397+
proc_data = ProcessedData(proc_data_id)
398+
datatype = proc_data.data_type()
399+
# skip processed data if it doesn't fit the given datatypes
400+
if datatypes is not None and datatype not in datatypes:
401+
continue
402+
filter_samps = proc_data.samples.intersection(study_sample_ids)
403+
if filter_samps:
404+
proc_data_samples[proc_data_id] = sorted(filter_samps)
405+
study_proc_ids[study_id][datatype].append(proc_data_id)
406+
407+
return study_proc_ids, proc_data_samples, samples_meta

qiita_db/study.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,14 @@
9898
from __future__ import division
9999
from future.utils import viewitems
100100
from copy import deepcopy
101+
from itertools import chain
102+
import warnings
101103

102104
from qiita_core.exceptions import IncompetentQiitaDeveloperError
103105
from .base import QiitaObject
104106
from .exceptions import (QiitaDBStatusError, QiitaDBColumnError, QiitaDBError)
105107
from .util import (check_required_columns, check_table_cols, convert_to_id,
106-
get_environmental_packages, infer_status)
108+
get_environmental_packages, get_table_cols, infer_status)
107109
from .sql_connection import SQLConnectionHandler
108110

109111

@@ -142,7 +144,12 @@ class Study(QiitaObject):
142144
"""
143145
_table = "study"
144146
# The following columns are considered not part of the study info
145-
_non_info = {"email", "study_title"}
147+
_non_info = frozenset(["email", "study_title"])
148+
# The following tables are considered part of info
149+
_info_cols = frozenset(chain(
150+
get_table_cols('study'), get_table_cols('study_status'),
151+
get_table_cols('timeseries_type'), get_table_cols('portal_type'),
152+
get_table_cols('study_pmid')))
146153

147154
def _lock_non_sandbox(self, conn_handler):
148155
"""Raises QiitaDBStatusError if study is non-sandboxed"""
@@ -198,6 +205,45 @@ def get_by_status(cls, status):
198205

199206
return studies
200207

208+
@classmethod
209+
def get_info(cls, study_ids=None, info_cols=None):
210+
"""Returns study data for a set of study_ids
211+
212+
Parameters
213+
----------
214+
study_ids : list of ints, optional
215+
Studies to get information for. Defauls to all studies
216+
info_cols: list of str, optional
217+
Information columns to retrieve. Defaults to all study data
218+
219+
Returns
220+
-------
221+
list of DictCursor
222+
Table-like structure of metadata, one study per row. Can be
223+
accessed as a list of dictionaries, keyed on column name.
224+
"""
225+
if info_cols is None:
226+
info_cols = cls._info_cols
227+
elif not cls._info_cols.issuperset(info_cols):
228+
warnings.warn("Non-info columns passed: %s" % ", ".join(
229+
set(info_cols) - cls._info_cols))
230+
231+
search_cols = ",".join(sorted(cls._info_cols.intersection(info_cols)))
232+
233+
sql = """SELECT {0} FROM (
234+
qiita.study
235+
JOIN qiita.timeseries_type USING (timeseries_type_id)
236+
JOIN qiita.portal_type USING (portal_type_id)
237+
LEFT JOIN (SELECT study_id, array_agg(pmid ORDER BY pmid) as
238+
pmid FROM qiita.study_pmid GROUP BY study_id) sp USING (study_id)
239+
)""".format(search_cols)
240+
if study_ids is not None:
241+
sql = "{0} WHERE study_id in ({1})".format(
242+
sql, ','.join(str(s) for s in study_ids))
243+
244+
conn_handler = SQLConnectionHandler()
245+
return conn_handler.execute_fetchall(sql)
246+
201247
@classmethod
202248
def exists(cls, study_title):
203249
"""Check if a study exists based on study_title, which is unique

qiita_db/support_files/patches/20.sql

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
1-
-- March 28, 2015
2-
-- Add default analyses for all existing users
3-
DO $do$
4-
DECLARE
5-
eml varchar;
6-
aid bigint;
7-
BEGIN
8-
FOR eml IN
9-
SELECT email FROM qiita.qiita_user
10-
LOOP
11-
INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid;
12-
INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2);
13-
END LOOP;
14-
END $do$;
1+
-- March 19, 2015
2+
-- Rename columns to be more descriptive and allow easier joins
3+
ALTER TABLE qiita.processed_data_status RENAME COLUMN description TO processed_data_status_description;
4+
ALTER TABLE qiita.portal_type RENAME COLUMN description TO portal_description;
5+
ALTER TABLE qiita.investigation RENAME COLUMN description TO investigation_description;
6+
ALTER TABLE qiita.investigation RENAME COLUMN name TO investigation_name;

qiita_db/support_files/patches/21.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
-- March 28, 2015
2+
-- Add default analyses for all existing users
3+
DO $do$
4+
DECLARE
5+
eml varchar;
6+
aid bigint;
7+
BEGIN
8+
FOR eml IN
9+
SELECT email FROM qiita.qiita_user
10+
LOOP
11+
INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid;
12+
INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2);
13+
END LOOP;
14+
END $do$;

qiita_db/support_files/populate_test_db.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ INSERT INTO qiita.study_users (study_id, email) VALUES (1, 'shared@foo.bar');
4949
INSERT INTO qiita.study_pmid (study_id, pmid) VALUES (1, '123456'), (1, '7891011');
5050

5151
-- Insert an investigation
52-
INSERT INTO qiita.investigation (name, description, contact_person_id) VALUES
52+
INSERT INTO qiita.investigation (investigation_name, investigation_description, contact_person_id) VALUES
5353
('TestInvestigation', 'An investigation for testing purposes', 3);
5454

5555
-- Insert investigation_study (link study 1 with investigation 1)

qiita_db/support_files/qiita-db.dbs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,8 @@
518518
<table name="investigation" >
519519
<comment>Overarching investigation information.An investigation comprises one or more individual studies.</comment>
520520
<column name="investigation_id" type="bigserial" jt="-5" mandatory="y" />
521-
<column name="name" type="varchar" jt="12" mandatory="y" />
522-
<column name="description" type="varchar" jt="12" mandatory="y" >
521+
<column name="investigation_name" type="varchar" jt="12" mandatory="y" />
522+
<column name="investigation_description" type="varchar" jt="12" mandatory="y" >
523523
<comment><![CDATA[Describes the overarching goal of the investigation]]></comment>
524524
</column>
525525
<column name="contact_person_id" type="bigint" jt="-5" />
@@ -682,7 +682,7 @@
682682
<comment>What portals are available to show a study in</comment>
683683
<column name="portal_type_id" type="bigserial" jt="-5" mandatory="y" />
684684
<column name="portal" type="varchar" jt="12" mandatory="y" />
685-
<column name="description" type="varchar" jt="12" mandatory="y" />
685+
<column name="portal_description" type="varchar" jt="12" mandatory="y" />
686686
<index name="pk_portal_type" unique="PRIMARY_KEY" >
687687
<column name="portal_type_id" />
688688
</index>
@@ -1000,7 +1000,7 @@
10001000
<table name="processed_data_status" >
10011001
<column name="processed_data_status_id" type="bigserial" jt="-5" mandatory="y" />
10021002
<column name="processed_data_status" type="varchar" jt="12" mandatory="y" />
1003-
<column name="description" type="varchar" jt="12" mandatory="y" />
1003+
<column name="processed_data_status_description" type="varchar" jt="12" mandatory="y" />
10041004
<index name="pk_study_status" unique="PRIMARY_KEY" >
10051005
<column name="processed_data_status_id" />
10061006
</index>
@@ -1565,7 +1565,6 @@ Controlled Vocabulary]]></comment>
15651565
<connector name="PostgreSQL" database="PostgreSQL" driver_class="org.postgresql.Driver" driver_jar="postgresql-9.2-1003.jdbc3.jar" host="localhost" port="5432" instance="qiita_test" user="mcdonadt" schema_mapping="" />
15661566
<layout id="Layout669806" name="qiita" show_relation_columns="y" >
15671567
<entity schema="qiita" name="controlled_vocab_values" color="d0def5" x="45" y="1545" />
1568-
<entity schema="qiita" name="investigation" color="c0d4f3" x="2100" y="255" />
15691568
<entity schema="qiita" name="investigation_study" color="c0d4f3" x="2100" y="405" />
15701569
<entity schema="qiita" name="job_results_filepath" color="c0d4f3" x="405" y="855" />
15711570
<entity schema="qiita" name="analysis_job" color="d0def5" x="285" y="930" />
@@ -1635,10 +1634,11 @@ Controlled Vocabulary]]></comment>
16351634
<entity schema="qiita" name="analysis" color="d0def5" x="225" y="720" />
16361635
<entity schema="qiita" name="study_experimental_factor" color="c0d4f3" x="2100" y="495" />
16371636
<entity schema="qiita" name="study_pmid" color="c0d4f3" x="2145" y="600" />
1638-
<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
16391637
<entity schema="qiita" name="study" color="d0def5" x="1815" y="60" />
16401638
<entity schema="qiita" name="processed_data" color="d0def5" x="1275" y="960" />
1641-
<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1530" y="1050" />
1639+
<entity schema="qiita" name="investigation" color="c0d4f3" x="2100" y="255" />
1640+
<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1500" y="1050" />
1641+
<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
16421642
<group name="Group_analyses" color="c4e0f9" >
16431643
<comment>analysis tables</comment>
16441644
<entity schema="qiita" name="analysis" />

0 commit comments

Comments
 (0)