-
Notifications
You must be signed in to change notification settings - Fork 79
archive biom: back changes #3202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
1762459
ed44fb3
a90530b
03cce99
acfa033
6959ceb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -655,6 +655,73 @@ def delete(cls, artifact_id): | |
| sql = "DELETE FROM qiita.artifact WHERE artifact_id IN %s" | ||
| qdb.sql_connection.TRN.add(sql, [all_ids]) | ||
|
|
||
| @classmethod | ||
| def archive(cls, artifact_id): | ||
| """Archive artifact with artifact_id | ||
|
|
||
| Parameters | ||
| ---------- | ||
| artifact_id : int | ||
| The artifact to be archived | ||
|
|
||
| Raises | ||
| ------ | ||
| QiitaDBOperationNotPermittedError | ||
| If the artifact is not public | ||
| If the artifact_type is not BIOM | ||
| If the artifact belowns to an analysis | ||
| If the artifact has no parents (raw file) | ||
| """ | ||
| artifact = cls(artifact_id) | ||
|
|
||
| if artifact.visibility != 'public': | ||
| raise qdb.exceptions.QiitaDBOperationNotPermittedError( | ||
| 'Only public artifacts can be archived') | ||
| if artifact.artifact_type != 'BIOM': | ||
| raise qdb.exceptions.QiitaDBOperationNotPermittedError( | ||
| 'Only BIOM artifacts can be archived') | ||
| if artifact.analysis is not None: | ||
| raise qdb.exceptions.QiitaDBOperationNotPermittedError( | ||
| 'Only non analysis artifacts can be archived') | ||
| if not artifact.parents: | ||
| raise qdb.exceptions.QiitaDBOperationNotPermittedError( | ||
| 'Only non raw artifacts can be archived') | ||
|
|
||
| # let's find all ancestors that can be deleted (it has parents and no | ||
| # ancestors, and delete them | ||
|
||
| to_delete = [x for x in artifact.ancestors.nodes() | ||
| if x.id != artifact_id and x.parents and | ||
| not [y for y in x.descendants.nodes() | ||
| if y.id not in (artifact_id, x.id)]] | ||
| # ignore artifacts that can and has been submitted to EBI | ||
| to_delete = [x for x in to_delete if not x.can_be_submitted_to_ebi or | ||
| x.is_submitted_to_vamps] | ||
|
|
||
| # get the log file so we can delete | ||
| fids = [x['fp_id'] for x in artifact.filepaths | ||
| if x['fp_type'] == 'log'] | ||
|
|
||
| with qdb.sql_connection.TRN: | ||
| artifact._set_visibility('archived', propagate=False) | ||
| sql = 'DELETE FROM qiita.parent_artifact WHERE artifact_id = %s' | ||
| qdb.sql_connection.TRN.add(sql, [artifact_id]) | ||
|
|
||
| sql = '''DELETE FROM qiita.artifact_output_processing_job | ||
| WHERE artifact_id = %s''' | ||
| qdb.sql_connection.TRN.add(sql, [artifact_id]) | ||
|
|
||
| if fids: | ||
| sql = '''DELETE FROM qiita.artifact_filepath | ||
| WHERE filepath_id IN %s''' | ||
| qdb.sql_connection.TRN.add(sql, [tuple(fids)]) | ||
|
|
||
| qdb.sql_connection.TRN.execute() | ||
|
|
||
| # cleaning the extra artifacts | ||
| for x in to_delete: | ||
| x._set_visibility('sandbox', propagate=False) | ||
| cls.delete(x.id) | ||
|
|
||
| @property | ||
| def name(self): | ||
| """The name of the artifact | ||
|
|
@@ -745,18 +812,21 @@ def visibility(self): | |
| qdb.sql_connection.TRN.add(sql, [self.id]) | ||
| return qdb.sql_connection.TRN.execute_fetchlast() | ||
|
|
||
| def _set_visibility(self, value): | ||
| def _set_visibility(self, value, propagate=True): | ||
| "helper method to split validation and actual set of the visibility" | ||
| # In order to correctly propagate the visibility we need to find | ||
| # the root of this artifact and then propagate to all the artifacts | ||
| vis_id = qdb.util.convert_to_id(value, "visibility") | ||
|
|
||
| sql = "SELECT * FROM qiita.find_artifact_roots(%s)" | ||
| qdb.sql_connection.TRN.add(sql, [self.id]) | ||
| root_id = qdb.sql_connection.TRN.execute_fetchlast() | ||
| root = qdb.artifact.Artifact(root_id) | ||
| # these are the ids of all the children from the root | ||
| ids = [a.id for a in root.descendants.nodes()] | ||
| if propagate: | ||
| sql = "SELECT * FROM qiita.find_artifact_roots(%s)" | ||
| qdb.sql_connection.TRN.add(sql, [self.id]) | ||
| root_id = qdb.sql_connection.TRN.execute_fetchlast() | ||
| root = qdb.artifact.Artifact(root_id) | ||
| # these are the ids of all the children from the root | ||
| ids = [a.id for a in root.descendants.nodes()] | ||
| else: | ||
| ids = [self.id] | ||
|
|
||
| sql = """UPDATE qiita.artifact | ||
| SET visibility_id = %s | ||
|
|
@@ -1317,9 +1387,11 @@ def youngest_artifact(self): | |
| sql = """SELECT artifact_id | ||
| FROM qiita.artifact_descendants(%s) | ||
| JOIN qiita.artifact USING (artifact_id) | ||
| WHERE visibility_id NOT IN %s | ||
| ORDER BY generated_timestamp DESC | ||
| LIMIT 1""" | ||
| qdb.sql_connection.TRN.add(sql, [self.id]) | ||
| qdb.sql_connection.TRN.add( | ||
| sql, [self.id, qdb.util.artifact_visibilities_to_skip()]) | ||
| a_id = qdb.sql_connection.TRN.execute_fetchindex() | ||
| # If the current artifact has no children, the previous call will | ||
| # return an empty list, so the youngest artifact in the lineage is | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| -- Jun 8, 2022 | ||
| -- adding the new visibility level: archived | ||
|
|
||
| INSERT INTO qiita.visibility (visibility, visibility_description) VALUES ('archived', 'Archived artifact'); | ||
|
|
||
| -- update function to ignore archived artifacts | ||
| CREATE OR REPLACE FUNCTION qiita.bioms_from_preparation_artifacts(prep_id bigint) RETURNS TEXT AS $$ | ||
| DECLARE | ||
| artifacts TEXT := NULL; | ||
| BEGIN | ||
| SELECT array_to_string(array_agg(artifact_id), ',') INTO artifacts | ||
| FROM qiita.preparation_artifact | ||
| LEFT JOIN qiita.artifact USING (artifact_id) | ||
| LEFT JOIN qiita.artifact_type USING (artifact_type_id) | ||
| LEFT JOIN qiita.software_command USING (command_id) | ||
| LEFT JOIN qiita.software USING (software_id) | ||
| LEFT JOIN qiita.visibility USING (visibility_id) | ||
| WHERE | ||
| prep_template_id = prep_id AND | ||
| artifact_type = 'BIOM' AND | ||
| NOT deprecated AND | ||
| visibility != 'archived'; | ||
| RETURN artifacts; | ||
| END | ||
| $$ LANGUAGE plpgsql; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1726,6 +1726,7 @@ def get_artifacts_information(artifact_ids, only_biom=True): | |
| JOIN qiita.filepath USING (filepath_id) | ||
| WHERE af.artifact_id = a.artifact_id) filepaths ON true | ||
| WHERE a.artifact_id IN %s | ||
| AND a.visibility_id NOT IN %s | ||
| GROUP BY a.artifact_id, a.name, a.command_id, sc.name, | ||
| a.generated_timestamp, dt.data_type, parent_id, | ||
| parent_info.command_id, parent_info.name | ||
|
|
@@ -1774,7 +1775,8 @@ def get_artifacts_information(artifact_ids, only_biom=True): | |
| ps = {} | ||
| algorithm_az = {'': ''} | ||
| PT = qdb.metadata_template.prep_template.PrepTemplate | ||
| qdb.sql_connection.TRN.add(sql, [tuple(artifact_ids)]) | ||
| qdb.sql_connection.TRN.add(sql, [ | ||
| tuple(artifact_ids), qdb.util.artifact_visibilities_to_skip()]) | ||
| for row in qdb.sql_connection.TRN.execute_fetchindex(): | ||
| aid, name, cid, cname, gt, aparams, dt, pid, pcid, pname, \ | ||
| pparams, filepaths, _, prep_template_id = row | ||
|
|
@@ -1950,6 +1952,10 @@ def open_file(filepath_or, *args, **kwargs): | |
| fh.close() | ||
|
|
||
|
|
||
| def artifact_visibilities_to_skip(): | ||
| return tuple([qdb.util.convert_to_id('archived', "visibility")]) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A new function for one line of code seems a little excessive. However I appreciate the readability. Not sure I would suggest unfolding it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried adding as a global variable in the artifact.py and as a member of the Artifact object but turns out that this is not possible because the code will try to initialize those variables when the code is initiated (and there is no database) so it fails, for example. |
||
|
|
||
|
|
||
| def generate_analysis_list(analysis_ids, public_only=False): | ||
| """Get general analysis information | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,21 +20,24 @@ A Study | |
|
|
||
| Qiita’s main entity is the idea of a study. A study can have many samples, with | ||
| many preparations, that have been sequenced several times, Figure 1. | ||
| Additionally, study artifacts have 3 different states: sandboxed, private and | ||
| public. A sandboxed artifact has all operational capabilities in the system | ||
| but is not publicly available, allowing for quick integration with other | ||
| studies but at the same time keeping it private so the user can improve the | ||
| analysis. Once a user decides that is time to make their artifact public they | ||
| can request an administrator to validate their study information and make it | ||
| private and possibly submit to a permanent repository, where it can also be | ||
| kept private until the user wants to make it public. At this stage in Qiita | ||
| Additionally, study artifacts have 5 different states: sandboxed, awaiting_approval, | ||
| private, public and archived. A sandboxed artifact has all operational | ||
| capabilities in the system but is not publicly available, allowing for quick | ||
| integration with other studies but at the same time keeping it private so the | ||
|
||
| user can improve the analysis. Once a user decides that is time to make their | ||
| artifact public they can request an administrator to validate their study information | ||
| and make it private ('awaiting_approval' is this intermediary state between | ||
| sandbox and private) and possibly submit to a permanent repository, where it can | ||
| also be kept private until the user wants to make it public. At this stage in Qiita | ||
| the whole study (including all processed data) is private. This process is | ||
| completely automatic via the Graphical User Interface (GUI). Currently sequence | ||
| data is being deposited for permanent storage to the European Nucleotide | ||
| Archive (ENA), part of the European Bioinformatics Institute (EBI). Finally, | ||
| when the user is ready, usually when the main manuscript of the study is ready | ||
| for publication, the user can request for the artifact to be made public | ||
| public, both in Qiita and the permanent repository, Figure 2. | ||
| public, both in Qiita and the permanent repository, Figure 2. Finally, when new | ||
| processing algorithms are available, the older BIOM artifacts are archived, for | ||
| long term storage. | ||
|
|
||
|
|
||
| .. figure:: images/figure1.png | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
belowns -> belongs