Skip to content

Commit 54f63e9

Browse files
committed
Merge pull request #900 from josenavas/issue-855
Issue 855
2 parents 7a398ed + 3bca14e commit 54f63e9

34 files changed

+3118
-2472
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
language: python
2+
sudo: false
23
env:
34
global:
45
- PYTHON_VERSION=2.7

qiita_db/data.py

Lines changed: 175 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,11 @@
8585
from .base import QiitaObject
8686
from .logger import LogEntry
8787
from .sql_connection import SQLConnectionHandler
88-
from .exceptions import QiitaDBError, QiitaDBUnknownIDError
88+
from .exceptions import QiitaDBError, QiitaDBUnknownIDError, QiitaDBStatusError
8989
from .util import (exists_dynamic_table, insert_filepaths, convert_to_id,
9090
convert_from_id, purge_filepaths, get_filepath_id,
91-
get_mountpoint, move_filepaths_to_upload_folder)
91+
get_mountpoint, move_filepaths_to_upload_folder,
92+
infer_status)
9293

9394

9495
class BaseData(QiitaObject):
@@ -615,6 +616,58 @@ def remove_filepath(self, fp):
615616
# Delete the files, if they are not used anywhere
616617
purge_filepaths(conn_handler)
617618

619+
def status(self, study):
620+
"""The status of the raw data within the given study
621+
622+
Parameters
623+
----------
624+
study : Study
625+
The study that is looking to the raw data status
626+
627+
Returns
628+
-------
629+
str
630+
The status of the raw data
631+
632+
Raises
633+
------
634+
QiitaDBStatusError
635+
If the raw data does not belong to the passed study
636+
637+
Notes
638+
-----
639+
Given that a raw data can be shared by multiple studies, we need to
640+
know under which context (study) we want to check the raw data status.
641+
The rationale is that a raw data object can contain data from multiple
642+
studies, so the raw data can have multiple status at the same time.
643+
We then check the processed data generated to infer the status of the
644+
raw data.
645+
"""
646+
if self._id not in study.raw_data():
647+
raise QiitaDBStatusError(
648+
"The study %s does not have access to the raw data %s"
649+
% (study.id, self.id))
650+
651+
conn_handler = SQLConnectionHandler()
652+
sql = """SELECT processed_data_status
653+
FROM qiita.processed_data_status pds
654+
JOIN qiita.processed_data pd
655+
USING (processed_data_status_id)
656+
JOIN qiita.preprocessed_processed_data ppd_pd
657+
USING (processed_data_id)
658+
JOIN qiita.prep_template_preprocessed_data pt_ppd
659+
USING (preprocessed_data_id)
660+
JOIN qiita.prep_template pt
661+
USING (prep_template_id)
662+
JOIN qiita.raw_data rd
663+
USING (raw_data_id)
664+
JOIN qiita.study_processed_data spd
665+
USING (processed_data_id)
666+
WHERE pt.raw_data_id=%s AND spd.study_id=%s"""
667+
pd_statuses = conn_handler.execute_fetchall(sql, (self._id, study.id))
668+
669+
return infer_status(pd_statuses)
670+
618671

619672
class PreprocessedData(BaseData):
620673
r"""Object for dealing with preprocessed data
@@ -995,6 +1048,34 @@ def processing_status(self, state):
9951048
"UPDATE qiita.{0} SET processing_status=%s WHERE "
9961049
"preprocessed_data_id=%s".format(self._table), (state, self.id))
9971050

1051+
@property
1052+
def status(self):
1053+
"""The status of the preprocessed data
1054+
1055+
Returns
1056+
-------
1057+
str
1058+
The status of the preprocessed_data
1059+
1060+
Notes
1061+
-----
1062+
The status of a preprocessed data is inferred by the status of the
1063+
processed data generated from this preprocessed data. If no processed
1064+
data has been generated with this preprocessed data; then the status
1065+
is 'sandbox'.
1066+
"""
1067+
conn_handler = SQLConnectionHandler()
1068+
sql = """SELECT processed_data_status
1069+
FROM qiita.processed_data_status pds
1070+
JOIN qiita.processed_data pd
1071+
USING (processed_data_status_id)
1072+
JOIN qiita.preprocessed_processed_data ppd_pd
1073+
USING (processed_data_id)
1074+
WHERE ppd_pd.preprocessed_data_id=%s"""
1075+
pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))
1076+
1077+
return infer_status(pd_statuses)
1078+
9981079

9991080
class ProcessedData(BaseData):
10001081
r"""Object for dealing with processed data
@@ -1020,6 +1101,61 @@ class ProcessedData(BaseData):
10201101
_study_processed_table = "study_processed_data"
10211102
_preprocessed_processed_table = "preprocessed_processed_data"
10221103

1104+
@classmethod
1105+
def get_by_status(cls, status):
1106+
"""Returns id for all ProcessedData with given status
1107+
1108+
Parameters
1109+
----------
1110+
status : str
1111+
Status to search for
1112+
1113+
Returns
1114+
-------
1115+
list of int
1116+
All the processed data ids that match the given status
1117+
"""
1118+
conn_handler = SQLConnectionHandler()
1119+
sql = """SELECT processed_data_id FROM qiita.processed_data pd
1120+
JOIN qiita.processed_data_status pds
1121+
USING (processed_data_status_id)
1122+
WHERE pds.processed_data_status=%s"""
1123+
result = conn_handler.execute_fetchall(sql, (status,))
1124+
if result:
1125+
pds = set(x[0] for x in result)
1126+
else:
1127+
pds = set()
1128+
1129+
return pds
1130+
1131+
@classmethod
1132+
def get_by_status_grouped_by_study(cls, status):
1133+
"""Returns id for all ProcessedData with given status grouped by study
1134+
1135+
Parameters
1136+
----------
1137+
status : str
1138+
Status to search for
1139+
1140+
Returns
1141+
-------
1142+
dict of list of int
1143+
A dictionary keyed by study id in which the values are the
1144+
processed data ids that belong to that study and match the given
1145+
status
1146+
"""
1147+
conn_handler = SQLConnectionHandler()
1148+
sql = """SELECT spd.study_id,
1149+
array_agg(pd.processed_data_id ORDER BY pd.processed_data_id)
1150+
FROM qiita.processed_data pd
1151+
JOIN qiita.processed_data_status pds
1152+
USING (processed_data_status_id)
1153+
JOIN qiita.study_processed_data spd
1154+
USING (processed_data_id)
1155+
WHERE pds.processed_data_status = %s
1156+
GROUP BY spd.study_id;"""
1157+
return dict(conn_handler.execute_fetchall(sql, (status,)))
1158+
10231159
@classmethod
10241160
def create(cls, processed_params_table, processed_params_id, filepaths,
10251161
preprocessed_data=None, study=None, processed_date=None,
@@ -1173,3 +1309,40 @@ def processed_date(self):
11731309
return conn_handler.execute_fetchone(
11741310
"SELECT processed_date FROM qiita.{0} WHERE "
11751311
"processed_data_id=%s".format(self._table), (self.id,))[0]
1312+
1313+
@property
1314+
def status(self):
1315+
conn_handler = SQLConnectionHandler()
1316+
sql = """SELECT pds.processed_data_status
1317+
FROM qiita.processed_data_status pds
1318+
JOIN qiita.processed_data pd
1319+
USING (processed_data_status_id)
1320+
WHERE pd.processed_data_id=%s"""
1321+
return conn_handler.execute_fetchone(sql, (self._id,))[0]
1322+
1323+
@status.setter
1324+
def status(self, status):
1325+
"""Set the status value
1326+
1327+
Parameters
1328+
----------
1329+
status : str
1330+
The new status
1331+
1332+
Raises
1333+
------
1334+
QiitaDBStatusError
1335+
If the processed data status is public
1336+
"""
1337+
if self.status == 'public':
1338+
raise QiitaDBStatusError(
1339+
"Illegal operation on public processed data")
1340+
1341+
conn_handler = SQLConnectionHandler()
1342+
1343+
status_id = convert_to_id(status, 'processed_data_status',
1344+
conn_handler=conn_handler)
1345+
1346+
sql = """UPDATE qiita.{0} SET processed_data_status_id = %s
1347+
WHERE processed_data_id=%s""".format(self._table)
1348+
conn_handler.execute(sql, (status_id, self._id))

qiita_db/meta_util.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ def get_accessible_filepath_ids(user):
8181
return set(f[0] for f in fpids)
8282

8383
# First, the studies
84-
# There are public, private, and shared studies
85-
study_ids = Study.get_by_status('public') | user.user_studies | \
86-
user.shared_studies
84+
# There are private and shared studies
85+
study_ids = user.user_studies | user.shared_studies
8786

8887
filepath_ids = set()
8988
for study_id in study_ids:
@@ -107,7 +106,6 @@ def get_accessible_filepath_ids(user):
107106
for rdid in study.raw_data():
108107
for pt_id in RawData(rdid).prep_templates:
109108
# related to https://github.com/biocore/qiita/issues/596
110-
# and https://github.com/biocore/qiita/issues/554
111109
if PrepTemplate.exists(pt_id):
112110
for _id, _ in PrepTemplate(pt_id).get_filepaths():
113111
prep_fp_ids.append(_id)
@@ -118,6 +116,40 @@ def get_accessible_filepath_ids(user):
118116
in SampleTemplate(study_id).get_filepaths()]
119117
filepath_ids.update(sample_fp_ids)
120118

119+
# Next, the public processed data
120+
processed_data_ids = ProcessedData.get_by_status('public')
121+
for pd_id in processed_data_ids:
122+
processed_data = ProcessedData(pd_id)
123+
124+
# Add the filepaths of the processed data
125+
pd_fps = (fpid for fpid, _, _ in processed_data.get_filepaths())
126+
filepath_ids.update(pd_fps)
127+
128+
# Each processed data has a preprocessed data
129+
ppd = PreprocessedData(processed_data.preprocessed_data)
130+
ppd_fps = (fpid for fpid, _, _ in ppd.get_filepaths())
131+
filepath_ids.update(ppd_fps)
132+
133+
# Each preprocessed data has a prep template
134+
pt_id = ppd.prep_template
135+
# related to https://github.com/biocore/qiita/issues/596
136+
if PrepTemplate.exists(pt_id):
137+
pt = PrepTemplate(pt_id)
138+
pt_fps = (fpid for fpid, _ in pt.get_filepaths())
139+
filepath_ids.update(pt_fps)
140+
141+
# Each prep template has a raw data
142+
rd = RawData(pt.raw_data)
143+
rd_fps = (fpid for fpid, _, _ in rd.get_filepaths())
144+
filepath_ids.update(rd_fps)
145+
146+
# And each processed data has a study, which has a sample template
147+
st_id = processed_data.study
148+
if SampleTemplate.exists(st_id):
149+
sample_fp_ids = (_id for _id, _
150+
in SampleTemplate(st_id).get_filepaths())
151+
filepath_ids.update(sample_fp_ids)
152+
121153
# Next, analyses
122154
# Same as before, there are public, private, and shared
123155
analysis_ids = Analysis.get_by_status('public') | user.private_analyses | \

qiita_db/metadata_template.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
from .util import (exists_table, get_table_cols, get_emp_status,
6363
get_required_sample_info_status, convert_to_id,
6464
convert_from_id, get_mountpoint, insert_filepaths,
65-
scrub_data)
65+
scrub_data, infer_status)
6666
from .study import Study
6767
from .data import RawData
6868
from .logger import LogEntry
@@ -2206,6 +2206,36 @@ def create_qiime_mapping_file(self, prep_template_fp):
22062206

22072207
return filepath
22082208

2209+
@property
2210+
def status(self):
2211+
"""The status of the prep template
2212+
2213+
Returns
2214+
-------
2215+
str
2216+
The status of the prep template
2217+
2218+
Notes
2219+
-----
2220+
The status of a prep template is inferred by the status of the
2221+
processed data generated from this prep template. If no processed
2222+
data has been generated with this prep template; then the status
2223+
is 'sandbox'.
2224+
"""
2225+
conn_handler = SQLConnectionHandler()
2226+
sql = """SELECT processed_data_status
2227+
FROM qiita.processed_data_status pds
2228+
JOIN qiita.processed_data pd
2229+
USING (processed_data_status_id)
2230+
JOIN qiita.preprocessed_processed_data ppd_pd
2231+
USING (processed_data_id)
2232+
JOIN qiita.prep_template_preprocessed_data pt_ppd
2233+
USING (preprocessed_data_id)
2234+
WHERE pt_ppd.prep_template_id=%s"""
2235+
pd_statuses = conn_handler.execute_fetchall(sql, (self._id,))
2236+
2237+
return infer_status(pd_statuses)
2238+
22092239

22102240
def load_template_to_dataframe(fn, strip_whitespace=True):
22112241
"""Load a sample or a prep template into a data frame

0 commit comments

Comments
 (0)