Skip to content

Commit 09bd0f7

Browse files
committed
Merge pull request #1108 from biocore/cart-branch
Cart branch
2 parents c212708 + 61166af commit 09bd0f7

28 files changed

+1108
-867
lines changed

qiita_db/analysis.py

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# -----------------------------------------------------------------------------
1919
from __future__ import division
2020
from collections import defaultdict
21+
from itertools import product
2122
from os.path import join
2223

2324
from future.utils import viewitems
@@ -62,6 +63,7 @@ class Analysis(QiitaStatusObject):
6263
share
6364
unshare
6465
build_files
66+
summary_data
6567
"""
6668

6769
_table = "analysis"
@@ -99,7 +101,7 @@ def get_by_status(cls, status):
99101
return {x[0] for x in conn_handler.execute_fetchall(sql, (status,))}
100102

101103
@classmethod
102-
def create(cls, owner, name, description, parent=None):
104+
def create(cls, owner, name, description, parent=None, from_default=False):
103105
"""Creates a new analysis on the database
104106
105107
Parameters
@@ -112,23 +114,53 @@ def create(cls, owner, name, description, parent=None):
112114
Description of the analysis
113115
parent : Analysis object, optional
114116
The analysis this one was forked from
117+
from_default : bool, optional
118+
If True, use the default analysis to populate selected samples.
119+
Default False.
115120
"""
121+
queue = "create_analysis"
116122
conn_handler = SQLConnectionHandler()
123+
conn_handler.create_queue(queue)
117124
# TODO after demo: if exists()
118-
119-
# insert analysis information into table with "in construction" status
120-
sql = ("INSERT INTO qiita.{0} (email, name, description, "
121-
"analysis_status_id) VALUES (%s, %s, %s, 1) "
122-
"RETURNING analysis_id".format(cls._table))
123-
a_id = conn_handler.execute_fetchone(
124-
sql, (owner.id, name, description))[0]
125+
# Needed since issue #292 exists
126+
status_id = conn_handler.execute_fetchone(
127+
"SELECT analysis_status_id from qiita.analysis_status WHERE "
128+
"status = 'in_construction'")[0]
129+
if from_default:
130+
# insert analysis and move samples into that new analysis
131+
dflt_id = owner.default_analysis
132+
sql = """INSERT INTO qiita.{0}
133+
(email, name, description, analysis_status_id)
134+
VALUES (%s, %s, %s, %s)
135+
RETURNING analysis_id""".format(cls._table)
136+
conn_handler.add_to_queue(queue, sql, (owner.id, name,
137+
description, status_id))
138+
# MAGIC NUMBER 3: command selection step
139+
# needed so we skip the sample selection step
140+
sql = """INSERT INTO qiita.analysis_workflow
141+
(analysis_id, step) VALUES (%s, %s)
142+
RETURNING %s"""
143+
conn_handler.add_to_queue(queue, sql, ['{0}', 3, '{0}'])
144+
sql = """UPDATE qiita.analysis_sample
145+
SET analysis_id = %s
146+
WHERE analysis_id = %s RETURNING %s"""
147+
conn_handler.add_to_queue(queue, sql, ['{0}', dflt_id, '{0}'])
148+
else:
149+
# insert analysis information into table as "in construction"
150+
sql = """INSERT INTO qiita.{0}
151+
(email, name, description, analysis_status_id)
152+
VALUES (%s, %s, %s, %s)
153+
RETURNING analysis_id""".format(cls._table)
154+
conn_handler.add_to_queue(
155+
queue, sql, (owner.id, name, description, status_id))
125156

126157
# add parent if necessary
127158
if parent:
128159
sql = ("INSERT INTO qiita.analysis_chain (parent_id, child_id) "
129-
"VALUES (%s, %s)")
130-
conn_handler.execute(sql, (parent.id, a_id))
160+
"VALUES (%s, %s) RETURNING child_id")
161+
conn_handler.add_to_queue(queue, sql, [parent.id, '{0}'])
131162

163+
a_id = conn_handler.execute_queue(queue)[0]
132164
return cls(a_id)
133165

134166
# ---- Properties ----
@@ -470,6 +502,23 @@ def has_access(self, user):
470502
return self._id in Analysis.get_by_status('public') | \
471503
user.private_analyses | user.shared_analyses
472504

505+
def summary_data(self):
506+
"""Return number of studies, processed data, and samples selected
507+
508+
Returns
509+
-------
510+
dict
511+
counts keyed to their relevant type
512+
"""
513+
sql = """SELECT COUNT(DISTINCT study_id) as studies,
514+
COUNT(DISTINCT processed_data_id) as processed_data,
515+
COUNT(DISTINCT sample_id) as samples
516+
FROM qiita.study_processed_data
517+
JOIN qiita.analysis_sample USING (processed_data_id)
518+
WHERE analysis_id = %s"""
519+
conn_handler = SQLConnectionHandler()
520+
return dict(conn_handler.execute_fetchone(sql, [self._id]))
521+
473522
def share(self, user):
474523
"""Share the analysis with another user
475524
@@ -511,17 +560,26 @@ def add_samples(self, samples):
511560
512561
Parameters
513562
----------
514-
samples : list of tuples of (int, str)
563+
samples : dictionary of lists
515564
samples and the processed data id they come from in form
516-
[(processed_data_id, sample_id), ...]
565+
{processed_data_id: [sample1, sample2, ...], ...}
517566
"""
518567
conn_handler = SQLConnectionHandler()
519568
self._lock_check(conn_handler)
520-
sql = ("INSERT INTO qiita.analysis_sample "
521-
"(analysis_id, processed_data_id, sample_id) VALUES "
522-
"(%s, %s, %s)")
523-
conn_handler.executemany(sql, [(self._id, s[0], s[1])
524-
for s in samples])
569+
570+
for pid, samps in viewitems(samples):
571+
# get previously selected samples for pid and filter them out
572+
sql = """SELECT sample_id FROM qiita.analysis_sample
573+
WHERE processed_data_id = %s and analysis_id = %s"""
574+
prev_selected = [x[0] for x in
575+
conn_handler.execute_fetchall(sql,
576+
(pid, self._id))]
577+
578+
select = set(samps).difference(prev_selected)
579+
sql = ("INSERT INTO qiita.analysis_sample "
580+
"(analysis_id, processed_data_id, sample_id) VALUES "
581+
"({}, %s, %s)".format(self._id))
582+
conn_handler.executemany(sql, [x for x in product([pid], select)])
525583

526584
def remove_samples(self, proc_data=None, samples=None):
527585
"""Removes samples from the analysis

qiita_db/data.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,12 +1425,53 @@ def data_type(self, ret_id=False):
14251425
return data_type[0]
14261426

14271427
@property
1428-
def processed_date(self):
1429-
"""Return the processed date"""
1428+
def processing_info(self):
1429+
"""Return the processing item and settings used to create the data
1430+
1431+
Returns
1432+
-------
1433+
dict
1434+
Parameter settings keyed to the parameter, along with date and
1435+
algorithm used
1436+
"""
1437+
# Get processed date and the info for the dynamic table
14301438
conn_handler = SQLConnectionHandler()
1431-
return conn_handler.execute_fetchone(
1432-
"SELECT processed_date FROM qiita.{0} WHERE "
1433-
"processed_data_id=%s".format(self._table), (self.id,))[0]
1439+
sql = """SELECT processed_date, processed_params_table,
1440+
processed_params_id FROM qiita.{0}
1441+
WHERE processed_data_id=%s""".format(self._table)
1442+
static_info = conn_handler.execute_fetchone(sql, (self.id,))
1443+
1444+
# Get the info from the dynamic table, including reference used
1445+
sql = """SELECT * from qiita.{0}
1446+
JOIN qiita.reference USING (reference_id)
1447+
WHERE processed_params_id = {1}
1448+
""".format(static_info['processed_params_table'],
1449+
static_info['processed_params_id'])
1450+
dynamic_info = dict(conn_handler.execute_fetchone(sql))
1451+
1452+
# replace reference filepath_ids with full filepaths
1453+
# figure out what columns have filepaths and what don't
1454+
ref_fp_cols = {'sequence_filepath', 'taxonomy_filepath',
1455+
'tree_filepath'}
1456+
fp_ids = [str(dynamic_info[col]) for col in ref_fp_cols
1457+
if dynamic_info[col] is not None]
1458+
# Get the filepaths and create dict of fpid to filepath
1459+
sql = ("SELECT filepath_id, filepath FROM qiita.filepath WHERE "
1460+
"filepath_id IN ({})").format(','.join(fp_ids))
1461+
lookup = {fp[0]: fp[1] for fp in conn_handler.execute_fetchall(sql)}
1462+
# Loop through and replace ids
1463+
for key in ref_fp_cols:
1464+
if dynamic_info[key] is not None:
1465+
dynamic_info[key] = lookup[dynamic_info[key]]
1466+
1467+
# add missing info to the dictionary and remove id column info
1468+
dynamic_info['processed_date'] = static_info['processed_date']
1469+
dynamic_info['algorithm'] = static_info[
1470+
'processed_params_table'].split('_')[-1]
1471+
del dynamic_info['processed_params_id']
1472+
del dynamic_info['reference_id']
1473+
1474+
return dynamic_info
14341475

14351476
@property
14361477
def samples(self):

qiita_db/support_files/patches/21.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
-- March 28, 2015
2+
-- Add default analyses for all existing users
3+
DO $do$
4+
DECLARE
5+
eml varchar;
6+
aid bigint;
7+
BEGIN
8+
FOR eml IN
9+
SELECT email FROM qiita.qiita_user
10+
LOOP
11+
INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid;
12+
INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2);
13+
END LOOP;
14+
END $do$;

qiita_db/support_files/patches/22.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
-- April 16, 2015
2+
-- Add primary key to analysis_sample table, first deleting the duplicates
3+
-- http://stackoverflow.com/a/9862688
4+
DO $do$
5+
BEGIN
6+
CREATE TEMP TABLE temp_table
7+
ON COMMIT drop AS
8+
SELECT analysis_id, processed_data_id, sample_id
9+
FROM qiita.analysis_sample GROUP BY analysis_id, processed_data_id, sample_id;
10+
DELETE FROM qiita.analysis_sample;
11+
INSERT INTO qiita.analysis_sample (analysis_id, processed_data_id, sample_id) SELECT analysis_id, processed_data_id, sample_id FROM temp_table;
12+
13+
ALTER TABLE qiita.analysis_sample ADD CONSTRAINT pk_analysis_sample PRIMARY KEY ( analysis_id, processed_data_id, sample_id );
14+
END $do$

qiita_db/support_files/populate_test_db.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,9 @@ INSERT INTO qiita.collection_job (collection_id, job_id) VALUES (1, 1);
420420

421421
--share collection with shared user
422422
INSERT INTO qiita.collection_users (email, collection_id) VALUES ('shared@foo.bar', 1);
423+
424+
--add default analysis for users
425+
INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES ('test@foo.bar', 'test@foo.bar-dflt', 'dflt', true, 1), ('admin@foo.bar', 'admin@foo.bar-dflt', 'dflt', true, 1), ('shared@foo.bar', 'shared@foo.bar-dflt', 'dflt', true, 1), ('demo@microbio.me', 'demo@microbio.me-dflt', 'dflt', true, 1);
426+
427+
-- Attach samples to analysis
428+
INSERT INTO qiita.analysis_sample (analysis_id, processed_data_id, sample_id) VALUES (3,1,'1.SKD8.640184'), (3,1,'1.SKB7.640196'), (3,1,'1.SKM9.640192'), (3,1,'1.SKM4.640180')

qiita_db/support_files/qiita-db.dbs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@
127127
<index name="idx_analysis_sample_1" unique="NORMAL" >
128128
<column name="sample_id" />
129129
</index>
130+
<index name="pk_analysis_sample" unique="PRIMARY_KEY" >
131+
<column name="analysis_id" />
132+
<column name="processed_data_id" />
133+
<column name="sample_id" />
134+
</index>
130135
<fk name="fk_analysis_sample_analysis" to_schema="qiita" to_table="analysis" >
131136
<fk_column name="analysis_id" pk="analysis_id" />
132137
</fk>
@@ -1611,7 +1616,6 @@ Controlled Vocabulary]]></comment>
16111616
<entity schema="qiita" name="data_directory" color="b2cdf7" x="840" y="585" />
16121617
<entity schema="qiita" name="term" color="d0def5" x="810" y="1650" />
16131618
<entity schema="qiita" name="common_prep_info" color="d0def5" x="1050" y="165" />
1614-
<entity schema="qiita" name="analysis_sample" color="d0def5" x="45" y="1170" />
16151619
<entity schema="qiita" name="environmental_package" color="b2cdf7" x="2250" y="150" />
16161620
<entity schema="qiita" name="study_environmental_package" color="b2cdf7" x="2250" y="45" />
16171621
<entity schema="qiita" name="timeseries_type" color="c0d4f3" x="1680" y="615" />
@@ -1639,6 +1643,7 @@ Controlled Vocabulary]]></comment>
16391643
<entity schema="qiita" name="investigation" color="c0d4f3" x="2100" y="255" />
16401644
<entity schema="qiita" name="processed_data_status" color="c0d4f3" x="1500" y="1050" />
16411645
<entity schema="qiita" name="portal_type" color="c0d4f3" x="1995" y="660" />
1646+
<entity schema="qiita" name="analysis_sample" color="d0def5" x="45" y="1170" />
16421647
<group name="Group_analyses" color="c4e0f9" >
16431648
<comment>analysis tables</comment>
16441649
<entity schema="qiita" name="analysis" />

0 commit comments

Comments
 (0)