Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 80 additions & 8 deletions qiita_db/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,73 @@ def delete(cls, artifact_id):
sql = "DELETE FROM qiita.artifact WHERE artifact_id IN %s"
qdb.sql_connection.TRN.add(sql, [all_ids])

@classmethod
def archive(cls, artifact_id):
"""Archive artifact with artifact_id

Parameters
----------
artifact_id : int
The artifact to be archived

Raises
------
QiitaDBOperationNotPermittedError
If the artifact is not public
If the artifact_type is not BIOM
If the artifact belowns to an analysis
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

belowns -> belongs

If the artifact has no parents (raw file)
"""
artifact = cls(artifact_id)

if artifact.visibility != 'public':
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
'Only public artifacts can be archived')
if artifact.artifact_type != 'BIOM':
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
'Only BIOM artifacts can be archived')
if artifact.analysis is not None:
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
'Only non analysis artifacts can be archived')
if not artifact.parents:
raise qdb.exceptions.QiitaDBOperationNotPermittedError(
'Only non raw artifacts can be archived')

# let's find all ancestors that can be deleted (it has parents and no
# ancestors, and delete them
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ancestors -> descendants?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's actually both but added extra info to make it clearer

to_delete = [x for x in artifact.ancestors.nodes()
if x.id != artifact_id and x.parents and
not [y for y in x.descendants.nodes()
if y.id not in (artifact_id, x.id)]]
# ignore artifacts that can and has been submitted to EBI
to_delete = [x for x in to_delete if not x.can_be_submitted_to_ebi or
x.is_submitted_to_vamps]

# get the log file so we can delete
fids = [x['fp_id'] for x in artifact.filepaths
if x['fp_type'] == 'log']

with qdb.sql_connection.TRN:
artifact._set_visibility('archived', propagate=False)
sql = 'DELETE FROM qiita.parent_artifact WHERE artifact_id = %s'
qdb.sql_connection.TRN.add(sql, [artifact_id])

sql = '''DELETE FROM qiita.artifact_output_processing_job
WHERE artifact_id = %s'''
qdb.sql_connection.TRN.add(sql, [artifact_id])

if fids:
sql = '''DELETE FROM qiita.artifact_filepath
WHERE filepath_id IN %s'''
qdb.sql_connection.TRN.add(sql, [tuple(fids)])

qdb.sql_connection.TRN.execute()

# cleaning the extra artifacts
for x in to_delete:
x._set_visibility('sandbox', propagate=False)
cls.delete(x.id)

@property
def name(self):
"""The name of the artifact
Expand Down Expand Up @@ -745,18 +812,21 @@ def visibility(self):
qdb.sql_connection.TRN.add(sql, [self.id])
return qdb.sql_connection.TRN.execute_fetchlast()

def _set_visibility(self, value):
def _set_visibility(self, value, propagate=True):
"helper method to split validation and actual set of the visibility"
# In order to correctly propagate the visibility we need to find
# the root of this artifact and then propagate to all the artifacts
vis_id = qdb.util.convert_to_id(value, "visibility")

sql = "SELECT * FROM qiita.find_artifact_roots(%s)"
qdb.sql_connection.TRN.add(sql, [self.id])
root_id = qdb.sql_connection.TRN.execute_fetchlast()
root = qdb.artifact.Artifact(root_id)
# these are the ids of all the children from the root
ids = [a.id for a in root.descendants.nodes()]
if propagate:
sql = "SELECT * FROM qiita.find_artifact_roots(%s)"
qdb.sql_connection.TRN.add(sql, [self.id])
root_id = qdb.sql_connection.TRN.execute_fetchlast()
root = qdb.artifact.Artifact(root_id)
# these are the ids of all the children from the root
ids = [a.id for a in root.descendants.nodes()]
else:
ids = [self.id]

sql = """UPDATE qiita.artifact
SET visibility_id = %s
Expand Down Expand Up @@ -1317,9 +1387,11 @@ def youngest_artifact(self):
sql = """SELECT artifact_id
FROM qiita.artifact_descendants(%s)
JOIN qiita.artifact USING (artifact_id)
WHERE visibility_id NOT IN %s
ORDER BY generated_timestamp DESC
LIMIT 1"""
qdb.sql_connection.TRN.add(sql, [self.id])
qdb.sql_connection.TRN.add(
sql, [self.id, qdb.util.artifact_visibilities_to_skip()])
a_id = qdb.sql_connection.TRN.execute_fetchindex()
# If the current artifact has no children, the previous call will
# return an empty list, so the youngest artifact in the lineage is
Expand Down
5 changes: 3 additions & 2 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,9 @@ def status(self):
FROM qiita.prep_template
JOIN qiita.artifact USING (artifact_id)
JOIN qiita.visibility USING (visibility_id)
WHERE prep_template_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
WHERE prep_template_id = %s and visibility_id NOT IN %s"""
qdb.sql_connection.TRN.add(
sql, [self._id, qdb.util.artifact_visibilities_to_skip()])

return qdb.util.infer_status(
qdb.sql_connection.TRN.execute_fetchindex())
Expand Down
8 changes: 5 additions & 3 deletions qiita_db/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,9 @@ def status(self):
FROM qiita.visibility
JOIN qiita.artifact USING (visibility_id)
JOIN qiita.study_artifact USING (artifact_id)
WHERE study_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
WHERE study_id = %s and visibility_id NOT IN %s"""
qdb.sql_connection.TRN.add(
sql, [self._id, qdb.util.artifact_visibilities_to_skip()])
return qdb.util.infer_status(
qdb.sql_connection.TRN.execute_fetchindex())

Expand Down Expand Up @@ -1098,8 +1099,9 @@ def artifacts(self, dtype=None, artifact_type=None):
JOIN qiita.data_type USING (data_type_id)
JOIN qiita.study_artifact USING (artifact_id)
JOIN qiita.artifact_type USING (artifact_type_id)
WHERE study_id = %s{0}
WHERE study_id = %s{0} AND visibility_id NOT IN %s
ORDER BY artifact_id""".format(sql_where)
sql_args.append(qdb.util.artifact_visibilities_to_skip())

qdb.sql_connection.TRN.add(sql, sql_args)
return [qdb.artifact.Artifact(aid)
Expand Down
25 changes: 25 additions & 0 deletions qiita_db/support_files/patches/86.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- Jun 8, 2022
-- adding the new visibility level: archived

INSERT INTO qiita.visibility (visibility, visibility_description) VALUES ('archived', 'Archived artifact');

-- update function to ignore archived artifacts
CREATE OR REPLACE FUNCTION qiita.bioms_from_preparation_artifacts(prep_id bigint) RETURNS TEXT AS $$
DECLARE
artifacts TEXT := NULL;
BEGIN
SELECT array_to_string(array_agg(artifact_id), ',') INTO artifacts
FROM qiita.preparation_artifact
LEFT JOIN qiita.artifact USING (artifact_id)
LEFT JOIN qiita.artifact_type USING (artifact_type_id)
LEFT JOIN qiita.software_command USING (command_id)
LEFT JOIN qiita.software USING (software_id)
LEFT JOIN qiita.visibility USING (visibility_id)
WHERE
prep_template_id = prep_id AND
artifact_type = 'BIOM' AND
NOT deprecated AND
visibility != 'archived';
RETURN artifacts;
END
$$ LANGUAGE plpgsql;
35 changes: 35 additions & 0 deletions qiita_db/test/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,5 +1361,40 @@ def test_descendants_with_jobs_one_element(self):
self.assertCountEqual(obs, exp)


@qiita_test_checker()
class ArtifactArchiveTests(TestCase):
def test_archive(self):
A = qdb.artifact.Artifact
QE = qdb.exceptions.QiitaDBOperationNotPermittedError

# check nodes, without any change
exp_nodes = [A(1), A(2), A(3), A(4), A(5), A(6)]
self.assertCountEqual(A(1).descendants.nodes(), exp_nodes)
obs_artifacts = len(qdb.util.get_artifacts_information([4, 5, 6, 8]))
self.assertEqual(4, obs_artifacts)

# check errors
with self.assertRaisesRegex(QE, 'Only public artifacts can be '
'archived'):
A.archive(1)
A(1).visibility = 'public'

with self.assertRaisesRegex(QE, 'Only BIOM artifacts can be archived'):
A.archive(1)

A(8).visibility = 'public'
with self.assertRaisesRegex(QE, 'Only non analysis artifacts can '
'be archived'):
A.archive(8)

for aid in range(4, 7):
A.archive(aid)
exp_nodes.remove(A(aid))
self.assertCountEqual(A(1).descendants.nodes(), exp_nodes)

obs_artifacts = len(qdb.util.get_artifacts_information([4, 5, 6, 8]))
self.assertEqual(1, obs_artifacts)


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ def test_scrub_data_single_quote(self):

def test_get_visibilities(self):
obs = qdb.util.get_visibilities()
exp = ['awaiting_approval', 'sandbox', 'private', 'public']
exp = ['awaiting_approval', 'sandbox', 'private', 'public', 'archived']
self.assertEqual(obs, exp)

def test_infer_status(self):
Expand Down
8 changes: 7 additions & 1 deletion qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,6 +1726,7 @@ def get_artifacts_information(artifact_ids, only_biom=True):
JOIN qiita.filepath USING (filepath_id)
WHERE af.artifact_id = a.artifact_id) filepaths ON true
WHERE a.artifact_id IN %s
AND a.visibility_id NOT IN %s
GROUP BY a.artifact_id, a.name, a.command_id, sc.name,
a.generated_timestamp, dt.data_type, parent_id,
parent_info.command_id, parent_info.name
Expand Down Expand Up @@ -1774,7 +1775,8 @@ def get_artifacts_information(artifact_ids, only_biom=True):
ps = {}
algorithm_az = {'': ''}
PT = qdb.metadata_template.prep_template.PrepTemplate
qdb.sql_connection.TRN.add(sql, [tuple(artifact_ids)])
qdb.sql_connection.TRN.add(sql, [
tuple(artifact_ids), qdb.util.artifact_visibilities_to_skip()])
for row in qdb.sql_connection.TRN.execute_fetchindex():
aid, name, cid, cname, gt, aparams, dt, pid, pcid, pname, \
pparams, filepaths, _, prep_template_id = row
Expand Down Expand Up @@ -1950,6 +1952,10 @@ def open_file(filepath_or, *args, **kwargs):
fh.close()


def artifact_visibilities_to_skip():
return tuple([qdb.util.convert_to_id('archived', "visibility")])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A new function for one line of code seems a little excessive. However I appreciate the readability. Not sure I would suggest unfolding it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried adding as a global variable in the artifact.py and as a member of the Artifact object but turns out that this is not possible because the code will try to initialize those variables when the code is initiated (and there is no database) so it fails, for example.



def generate_analysis_list(analysis_ids, public_only=False):
"""Get general analysis information

Expand Down
21 changes: 12 additions & 9 deletions qiita_pet/support_files/doc/source/qiita-philosophy/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,24 @@ A Study

Qiita’s main entity is the idea of a study. A study can have many samples, with
many preparations, that have been sequenced several times, Figure 1.
Additionally, study artifacts have 3 different states: sandboxed, private and
public. A sandboxed artifact has all operational capabilities in the system
but is not publicly available, allowing for quick integration with other
studies but at the same time keeping it private so the user can improve the
analysis. Once a user decides that is time to make their artifact public they
can request an administrator to validate their study information and make it
private and possibly submit to a permanent repository, where it can also be
kept private until the user wants to make it public. At this stage in Qiita
Additionally, study artifacts have 5 different states: sandboxed, awaiting_approval,
private, public and archived. A sandboxed artifact has all operational
capabilities in the system but is not publicly available, allowing for quick
integration with other studies but at the same time keeping it private so the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using 'private' in this sentence might be confusing, since it's not the same meaning as the 'private' state for study artifacts. The text on 'awaiting_approval' was also a little confusing. Here are some suggested changes in the context of the text as a whole:


Additionally, study artifacts have 5 different states: sandboxed,
awaiting_approval, private, public and archived. A sandboxed artifact has all
operational capabilities in the system but is not publicly available, allowing
for quick integration with other studies but at the same time keeping it hidden
so the user can improve the analysis. 

Once a user is satisfied with their analysis, they can request to upgrade their
project's status to 'private'; this confers additional benefits to the project,
including permanent space in the repository. During this time, an administrator
will validate their study and its status will change to 'awaiting_approval'.

At this stage in Qiita the whole study (including all processed data) is
private. This process is completely automatic via the Graphical User Interface
(GUI). Currently sequence data is deposited for permanent storage to the
European Nucleotide Archive (ENA), part of the European Bioinformatics
Institute (EBI). When the user is ready, usually when the main manuscript of
the study is ready for publication, the user can request for the artifact to be
made 'public', both in Qiita and the permanent repository, Figure 2. Finally,
when new processing algorithms are available, the older BIOM artifacts are
'archived', for long term storage.

user can improve the analysis. Once a user decides that is time to make their
artifact public they can request an administrator to validate their study information
and make it private ('awaiting_approval' is this intermediary state between
sandbox and private) and possibly submit to a permanent repository, where it can
also be kept private until the user wants to make it public. At this stage in Qiita
the whole study (including all processed data) is private. This process is
completely automatic via the Graphical User Interface (GUI). Currently sequence
data is being deposited for permanent storage to the European Nucleotide
Archive (ENA), part of the European Bioinformatics Institute (EBI). Finally,
when the user is ready, usually when the main manuscript of the study is ready
for publication, the user can request for the artifact to be made public
public, both in Qiita and the permanent repository, Figure 2.
public, both in Qiita and the permanent repository, Figure 2. Finally, when new
processing algorithms are available, the older BIOM artifacts are archived, for
long term storage.


.. figure:: images/figure1.png
Expand Down