Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 72 additions & 67 deletions qiita_db/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _status_setter_checks(self):

@classmethod
def get_by_status(cls, status):
"""Returns analysis ids for all Analyses with given status
"""Returns all Analyses with given status

Parameters
----------
Expand All @@ -94,7 +94,7 @@ def get_by_status(cls, status):

Returns
-------
set of int
set of Analysis
All analyses in the database with the given status
"""
with qdb.sql_connection.TRN:
Expand All @@ -105,7 +105,9 @@ def get_by_status(cls, status):
JOIN qiita.portal_type USING (portal_type_id)
WHERE status = %s AND portal = %s""".format(cls._table)
qdb.sql_connection.TRN.add(sql, [status, qiita_config.portal])
return set(qdb.sql_connection.TRN.execute_fetchflatten())
return set(
cls(aid)
for aid in qdb.sql_connection.TRN.execute_fetchflatten())

@classmethod
def create(cls, owner, name, description, parent=None, from_default=False):
Expand Down Expand Up @@ -133,7 +135,7 @@ def create(cls, owner, name, description, parent=None, from_default=False):

if from_default:
# insert analysis and move samples into that new analysis
dflt_id = owner.default_analysis
dflt_id = owner.default_analysis.id

sql = """INSERT INTO qiita.{0}
(email, name, description, analysis_status_id)
Expand Down Expand Up @@ -265,14 +267,14 @@ def owner(self):

Returns
-------
str
Name of the Analysis
qiita_db.user.User
The owner of the Analysis
"""
with qdb.sql_connection.TRN:
sql = "SELECT email FROM qiita.{0} WHERE analysis_id = %s".format(
self._table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchlast()
return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast())

@property
def name(self):
Expand Down Expand Up @@ -353,21 +355,21 @@ def description(self, description):

@property
def samples(self):
"""The processed data and samples attached to the analysis
"""The artifact and samples attached to the analysis

Returns
-------
dict
Format is {processed_data_id: [sample_id, sample_id, ...]}
Format is {artifact_id: [sample_id, sample_id, ...]}
"""
with qdb.sql_connection.TRN:
sql = """SELECT processed_data_id, sample_id
sql = """SELECT artifact_id, sample_id
FROM qiita.analysis_sample
WHERE analysis_id = %s
ORDER BY processed_data_id"""
ORDER BY artifact_id"""
ret_samples = defaultdict(list)
qdb.sql_connection.TRN.add(sql, [self._id])
# turn into dict of samples keyed to processed_data_id
# turn into dict of samples keyed to artifact
for pid, sample in qdb.sql_connection.TRN.execute_fetchindex():
ret_samples[pid].append(sample)
return ret_samples
Expand All @@ -379,7 +381,7 @@ def dropped_samples(self):
Returns
-------
dict of sets
Format is {processed_data_id: {sample_id, sample_id, ...}, ...}
Format is {artifact_id: {sample_id, sample_id, ...}, ...}
"""
with qdb.sql_connection.TRN:
bioms = self.biom_tables
Expand Down Expand Up @@ -416,8 +418,8 @@ def data_types(self):
with qdb.sql_connection.TRN:
sql = """SELECT DISTINCT data_type
FROM qiita.data_type
JOIN qiita.processed_data USING (data_type_id)
JOIN qiita.analysis_sample USING (processed_data_id)
JOIN qiita.artifact USING (data_type_id)
JOIN qiita.analysis_sample USING (artifact_id)
WHERE analysis_id = %s
ORDER BY data_type"""
qdb.sql_connection.TRN.add(sql, [self._id])
Expand All @@ -436,7 +438,8 @@ def shared_with(self):
sql = """SELECT email FROM qiita.analysis_users
WHERE analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchflatten()
return [qdb.user.User(uid)
for uid in qdb.sql_connection.TRN.execute_fetchflatten()]

@property
def all_associated_filepath_ids(self):
Expand Down Expand Up @@ -564,14 +567,15 @@ def jobs(self):

Returns
-------
list of ints
list of qiita_db.job.Job
Job ids for jobs in analysis. Empty list if no jobs attached.
"""
with qdb.sql_connection.TRN:
sql = """SELECT job_id FROM qiita.analysis_job
WHERE analysis_id = %s""".format(self._table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchflatten()
return [qdb.job.Job(jid)
for jid in qdb.sql_connection.TRN.execute_fetchflatten()]

@property
def pmid(self):
Expand Down Expand Up @@ -641,11 +645,11 @@ def has_access(self, user):
if user.level in {'superuser', 'admin'}:
return True

return self._id in Analysis.get_by_status('public') | \
return self in Analysis.get_by_status('public') | \
user.private_analyses | user.shared_analyses

def summary_data(self):
"""Return number of studies, processed data, and samples selected
"""Return number of studies, artifacts, and samples selected

Returns
-------
Expand All @@ -655,10 +659,10 @@ def summary_data(self):
with qdb.sql_connection.TRN:
sql = """SELECT
COUNT(DISTINCT study_id) as studies,
COUNT(DISTINCT processed_data_id) as processed_data,
COUNT(DISTINCT artifact_id) as artifacts,
COUNT(DISTINCT sample_id) as samples
FROM qiita.study_processed_data
JOIN qiita.analysis_sample USING (processed_data_id)
FROM qiita.study_artifact
JOIN qiita.analysis_sample USING (artifact_id)
WHERE analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
return dict(qdb.sql_connection.TRN.execute_fetchindex()[0])
Expand Down Expand Up @@ -705,64 +709,64 @@ def add_samples(self, samples):
Parameters
----------
samples : dictionary of lists
samples and the processed data id they come from in form
{processed_data_id: [sample1, sample2, ...], ...}
samples and the artifact id they come from in form
{artifact_id: [sample1, sample2, ...], ...}
"""
with qdb.sql_connection.TRN:
self._lock_check()

for pid, samps in viewitems(samples):
# get previously selected samples for pid and filter them out
for aid, samps in viewitems(samples):
# get previously selected samples for aid and filter them out
sql = """SELECT sample_id
FROM qiita.analysis_sample
WHERE processed_data_id = %s AND analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [pid, self._id])
WHERE artifact_id = %s AND analysis_id = %s"""
qdb.sql_connection.TRN.add(sql, [aid, self._id])
prev_selected = qdb.sql_connection.TRN.execute_fetchflatten()

select = set(samps).difference(prev_selected)
sql = """INSERT INTO qiita.analysis_sample
(analysis_id, processed_data_id, sample_id)
(analysis_id, artifact_id, sample_id)
VALUES (%s, %s, %s)"""
args = [[self._id, pid, s] for s in select]
args = [[self._id, aid, s] for s in select]
qdb.sql_connection.TRN.add(sql, args, many=True)
qdb.sql_connection.TRN.execute()

def remove_samples(self, proc_data=None, samples=None):
def remove_samples(self, artifacts=None, samples=None):
"""Removes samples from the analysis

Parameters
----------
proc_data : list, optional
processed data ids to remove, default None
artifacts : list, optional
Artifacts to remove, default None
samples : list, optional
sample ids to remove, default None

Notes
-----
When only a list of samples given, the samples will be removed from all
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you didn't change this but are the black lines necessary? In the past we have use - at the beginning of each paragraph. Obviously, not blocking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

processed data ids it is associated with
artifacts it is associated with

When only a list of proc_data given, all samples associated with that
processed data are removed
When only a list of artifacts is given, all samples associated with
that artifact are removed

If both are passed, the given samples are removed from the given
processed data ids
artifacts
"""
with qdb.sql_connection.TRN:
self._lock_check()
if proc_data and samples:
if artifacts and samples:
sql = """DELETE FROM qiita.analysis_sample
WHERE analysis_id = %s
AND processed_data_id = %s
AND artifact_id = %s
AND sample_id = %s"""
# build tuples for what samples to remove from what
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment doesn't make sense, could you rewrite?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# processed data
args = [[self._id, p, s]
for p, s in product(proc_data, samples)]
elif proc_data:
# artifact
args = [[self._id, a.id, s]
for a, s in product(artifacts, samples)]
elif artifacts:
sql = """DELETE FROM qiita.analysis_sample
WHERE analysis_id = %s AND processed_data_id = %s"""
args = [[self._id, p] for p in proc_data]
WHERE analysis_id = %s AND artifact_id = %s"""
args = [[self._id, a.id] for a in artifacts]
elif samples:
sql = """DELETE FROM qiita.analysis_sample
WHERE analysis_id = %s AND sample_id = %s"""
Expand Down Expand Up @@ -809,13 +813,13 @@ def build_files(self, rarefaction_depth=None):
self._build_biom_tables(samples, rarefaction_depth)

def _get_samples(self):
"""Retrieves dict of samples to proc_data_id for the analysis"""
"""Retrieves dict of samples to artifact_id for the analysis"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you rewrite?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

with qdb.sql_connection.TRN:
sql = """SELECT processed_data_id, array_agg(
sql = """SELECT artifact_id, array_agg(
sample_id ORDER BY sample_id)
FROM qiita.analysis_sample
WHERE analysis_id = %s
GROUP BY processed_data_id"""
GROUP BY artifact_id"""
qdb.sql_connection.TRN.add(sql, [self._id])
return dict(qdb.sql_connection.TRN.execute_fetchindex())

Expand All @@ -826,26 +830,26 @@ def _build_biom_tables(self, samples, rarefaction_depth):
# each data type
new_tables = {dt: None for dt in self.data_types}
base_fp = qdb.util.get_work_base_dir()
for pid, samps in viewitems(samples):
# one biom table attached to each processed data object
proc_data = qdb.data.ProcessedData(pid)
proc_data_fp = proc_data.get_filepaths()[0][1]
table_fp = join(base_fp, proc_data_fp)
for a_id, samps in viewitems(samples):
# one biom table attached to each artifact object
artifact = qdb.artifact.Artifact(a_id)
artifact_fp = artifact.filepaths[0][1]
table_fp = join(base_fp, artifact_fp)
table = load_table(table_fp)
# HACKY WORKAROUND FOR DEMO. Issue # 246
# make sure samples not in biom table are not filtered for
table_samps = set(table.ids())
filter_samps = table_samps.intersection(samps)
# add the metadata column for study the samples come from
study_meta = {'Study': qdb.study.Study(proc_data.study).title,
'Processed_id': proc_data.id}
study_meta = {'Study': artifact.study.title,
'Processed_id': artifact.id}
samples_meta = {sid: study_meta for sid in filter_samps}
# filter for just the wanted samples and merge into new table
# this if/else setup avoids needing a blank table to
# start merges
table.filter(filter_samps, axis='sample', inplace=True)
table.add_metadata(samples_meta, axis='sample')
data_type = proc_data.data_type()
data_type = artifact.data_type
if new_tables[data_type] is None:
new_tables[data_type] = table
else:
Expand Down Expand Up @@ -873,13 +877,11 @@ def _build_mapping_file(self, samples):
sql = """SELECT filepath_id, filepath
FROM qiita.filepath
JOIN qiita.prep_template_filepath USING (filepath_id)
JOIN qiita.prep_template_preprocessed_data
USING (prep_template_id)
JOIN qiita.preprocessed_processed_data
USING (preprocessed_data_id)
JOIN qiita.prep_template USING (prep_template_id)
JOIN qiita.filepath_type USING (filepath_type_id)
WHERE processed_data_id = %s
AND filepath_type = 'qiime_map'
WHERE filepath_type = 'qiime_map'
AND artifact_id IN (SELECT *
FROM qiita.find_artifact_roots(%s))
ORDER BY filepath_id DESC"""
_id, fp = qdb.util.get_mountpoint('templates')[0]
to_concat = []
Expand Down Expand Up @@ -1086,31 +1088,34 @@ def owner(self):
sql = """SELECT email FROM qiita.{0}
WHERE collection_id = %s""".format(self._table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchlast()
return qdb.user.User(qdb.sql_connection.TRN.execute_fetchlast())

@property
def analyses(self):
with qdb.sql_connection.TRN:
sql = """SELECT analysis_id FROM qiita.{0}
WHERE collection_id = %s""".format(self._analysis_table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchflatten()
return [Analysis(aid)
for aid in qdb.sql_connection.TRN.execute_fetchflatten()]

@property
def highlights(self):
with qdb.sql_connection.TRN:
sql = """SELECT job_id FROM qiita.{0}
WHERE collection_id = %s""".format(self._highlight_table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchflatten()
return [qdb.job.Job(jid)
for jid in qdb.sql_connection.TRN.execute_fetchflatten()]

@property
def shared_with(self):
with qdb.sql_connection.TRN:
sql = """SELECT email FROM qiita.{0}
WHERE collection_id = %s""".format(self._share_table)
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchflatten()
return [qdb.user.User(uid)
for uid in qdb.sql_connection.TRN.execute_fetchflatten()]

# --- Functions ---
def add_analysis(self, analysis):
Expand Down
Loading