-
Couldn't load subscription status.
- Fork 79
Adding archive #2449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding archive #2449
Changes from all commits
7e5d60f
e0b6e31
7b9009e
8beb629
5c33745
42fa986
aa900f1
7e060fc
8571fd0
7ac2994
667e751
ddc2595
9cda47e
0db78d7
e8894c4
ed07b7d
20a86a8
a5d216f
1b99b87
6c2a4ea
39a5a9a
0888b44
376d78f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| # ----------------------------------------------------------------------------- | ||
| # Copyright (c) 2014--, The Qiita Development Team. | ||
| # | ||
| # Distributed under the terms of the BSD 3-clause License. | ||
| # | ||
| # The full license is in the file LICENSE, distributed with this software. | ||
| # ----------------------------------------------------------------------------- | ||
|
|
||
| from __future__ import division | ||
|
|
||
| import qiita_db as qdb | ||
|
|
||
|
|
||
| class Archive(qdb.base.QiitaObject): | ||
| r"""Extra information for any features stored in a BIOM Artifact | ||
|
|
||
| Methods | ||
| ------- | ||
| insert_from_biom | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. insert_from_artifact? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup |
||
| insert_from_artifact | ||
|
|
||
| See Also | ||
| -------- | ||
| qiita_db.QiitaObject | ||
| """ | ||
|
|
||
| @classmethod | ||
| def _inserting_main_steps(cls, ms, features): | ||
| with qdb.sql_connection.TRN: | ||
| sql = """INSERT INTO qiita.archive_merging_scheme | ||
| (archive_merging_scheme) | ||
| SELECT %s WHERE NOT EXISTS ( | ||
| SELECT 1 FROM qiita.archive_merging_scheme | ||
| WHERE archive_merging_scheme = %s)""" | ||
| qdb.sql_connection.TRN.add(sql, [ms, ms]) | ||
| sql = """SELECT archive_merging_scheme_id | ||
| FROM qiita.archive_merging_scheme | ||
| WHERE archive_merging_scheme = %s""" | ||
| qdb.sql_connection.TRN.add(sql, [ms]) | ||
| amsi = qdb.sql_connection.TRN.execute_fetchlast() | ||
|
|
||
| vals = [[amsi, _id, val] for _id, val in features.items()] | ||
| qdb.sql_connection.TRN.add( | ||
| "SELECT archive_upsert(%s, %s, %s)", vals, many=True) | ||
| qdb.sql_connection.TRN.execute() | ||
|
|
||
| @classmethod | ||
| def insert_from_artifact(cls, artifact, features): | ||
| r"""Inserts new features to the database based on a given artifact | ||
|
|
||
| Parameters | ||
| ---------- | ||
| artifact : qiita_db.artifact.Artifact | ||
| The artifact from which the features were generated | ||
| features : dict {str: str} | ||
| A dictionary of the features and the values to be stored | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError | ||
| If the Artifact type is not BIOM | ||
| If the artifact doesn't have a biom filepath | ||
| """ | ||
| with qdb.sql_connection.TRN: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like most of the logic contained can be done outside of the context? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. possible but I think it's better to leave it like this as some of these steps are actually accessing the DB so this assures that everything it's in the same transaction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh. implicit operations remote resource operations? :/ okay. |
||
| atype = artifact.artifact_type | ||
| if atype != 'BIOM': | ||
| raise ValueError( | ||
| "To archive artifact must be BIOM but %s" % atype) | ||
|
|
||
| bfps = [fp for _, fp, fpt in artifact.filepaths if fpt == 'biom'] | ||
| if not bfps: | ||
| raise ValueError("The artifact has no biom files") | ||
|
|
||
| # [0] as it returns a list | ||
| ms = qdb.util.get_artifacts_information( | ||
| [artifact.id])[0]['algorithm'] | ||
|
|
||
| cls._inserting_main_steps(ms, features) | ||
|
|
||
| @classmethod | ||
| def get_merging_scheme_from_job(cls, job): | ||
| r"""Inserts new features to the database based on a given job | ||
|
|
||
| Parameters | ||
| ---------- | ||
| job : qiita_db.artifact.Artifact | ||
| The artifact from which the features were generated | ||
| features : dict {str: str} | ||
| A dictionary of the features and the values to be stored | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError | ||
| If the Artifact type is not BIOM | ||
| If the artifact doesn't have a biom filepath | ||
| """ | ||
| with qdb.sql_connection.TRN: | ||
| acmd = job.command | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same ... 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. off topic but how does a developer know whether a given qiita method (or property) will issue a db call? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the safest assumption is that any property/method from a Qiita object will issue a DB call ... as everything is store in there. |
||
| ms = acmd.merging_scheme | ||
|
|
||
| # 1. cleaning aparams - the parameters of the main artifact/job | ||
| temp = acmd.optional_parameters.copy() | ||
| temp.update(acmd.required_parameters) | ||
| # list: cause it can be tuple or lists | ||
| # [0]: the first value is the parameter type | ||
| tparams = job.parameters.values | ||
| aparams = ','.join( | ||
| ['%s: %s' % (k, tparams[k]) for k, v in temp.items() | ||
| if list(v)[0] != 'artifact' and k in ms['parameters']]) | ||
| # in theory we could check here for the filepath merging but | ||
| # as the files haven't been creted we don't have this info. | ||
| # Additionally, based on the current funtionality, this is not | ||
| # important as normally the difference between files is just | ||
| # an additional filtering step | ||
| if aparams: | ||
| cname = "%s (%s)" % (acmd.name, aparams) | ||
| else: | ||
| cname = acmd.name | ||
|
|
||
| # 2. cleaning pparams - the parameters of the parent artifact | ||
| # [0] getting the atributes from the first parent | ||
| pcmd = job.input_artifacts[0].processing_parameters.command | ||
| palgorithm = 'N/A' | ||
| if pcmd is not None: | ||
| pms = pcmd.merging_scheme | ||
| palgorithm = pcmd.name | ||
| if pms['parameters']: | ||
| pass | ||
| # ToDo: Archive | ||
| # here we need to check for the parent parameters | ||
| # pparams = ','.join( | ||
| # ['%s: %s' % (k, tparams[k]) for k, v in temp.items() | ||
| # if list(v)[0] != 'artifact' and k in ms['parameters']]) | ||
| # | ||
| # params = ','.join(['%s: %s' % (k, pparams[k]) | ||
| # for k in ms['parameters']]) | ||
| # palgorithm = "%s (%s)" % (palgorithm, params) | ||
| # | ||
| algorithm = '%s | %s' % (cname, palgorithm) | ||
|
|
||
| return algorithm | ||
|
|
||
| @classmethod | ||
| def retrieve_feature_values(cls, archive_merging_scheme=None, | ||
| features=None): | ||
| r"""Retrieves all features/values from the archive | ||
|
|
||
| Parameters | ||
| ---------- | ||
| archive_merging_scheme : optional, str | ||
| The name of the archive_merging_scheme to retrieve | ||
|
|
||
| Notes | ||
| ----- | ||
| If archive_merging_scheme is None it will return all | ||
| feature values | ||
| """ | ||
| with qdb.sql_connection.TRN: | ||
| extras = [] | ||
| vals = [] | ||
| if archive_merging_scheme is not None: | ||
| extras.append("""archive_merging_scheme = %s""") | ||
| vals.append(archive_merging_scheme) | ||
| if features is not None: | ||
| extras.append("""archive_feature IN %s""") | ||
| vals.append(tuple(features)) | ||
|
|
||
| sql = """SELECT archive_feature, archive_feature_value | ||
| FROM qiita.archive_feature_value | ||
| LEFT JOIN qiita.archive_merging_scheme | ||
| USING (archive_merging_scheme_id) {0} | ||
| ORDER BY archive_merging_scheme, archive_feature""" | ||
|
|
||
| if extras: | ||
| sql = sql.format('WHERE ' + ' AND '.join(extras)) | ||
| qdb.sql_connection.TRN.add(sql, vals) | ||
| else: | ||
| qdb.sql_connection.TRN.add(sql.format('')) | ||
|
|
||
| return {k: v for k, v in | ||
| qdb.sql_connection.TRN.execute_fetchindex()} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| -- December 27th, 2017 | ||
| -- Creating archive feature tables | ||
|
|
||
| CREATE TABLE qiita.archive_merging_scheme ( | ||
| archive_merging_scheme_id bigserial NOT NULL, | ||
| archive_merging_scheme varchar NOT NULL, | ||
| CONSTRAINT pk_merging_scheme PRIMARY KEY ( archive_merging_scheme_id ) | ||
| ) ; | ||
|
|
||
| CREATE TABLE qiita.archive_feature_value ( | ||
| archive_merging_scheme_id bigint NOT NULL, | ||
| archive_feature varchar NOT NULL, | ||
| archive_feature_value varchar NOT NULL, | ||
| CONSTRAINT idx_archive_feature_value PRIMARY KEY ( archive_merging_scheme_id, archive_feature ) | ||
| ) ; | ||
|
|
||
| CREATE INDEX idx_archive_feature_value_0 ON qiita.archive_feature_value ( archive_merging_scheme_id ) ; | ||
|
|
||
| ALTER TABLE qiita.archive_feature_value ADD CONSTRAINT fk_archive_feature_value FOREIGN KEY ( archive_merging_scheme_id ) REFERENCES qiita.archive_merging_scheme( archive_merging_scheme_id ); | ||
|
|
||
| -- taken from https://goo.gl/YtSvz2 | ||
| CREATE OR REPLACE FUNCTION archive_upsert(amsi INT, af VARCHAR, afv VARCHAR) RETURNS VOID AS $$ | ||
| BEGIN | ||
| LOOP | ||
| -- first try to update the key | ||
| UPDATE qiita.archive_feature_value SET archive_feature_value = afv WHERE archive_merging_scheme_id = amsi AND archive_feature = af; | ||
| IF found THEN | ||
| RETURN; | ||
| END IF; | ||
| -- not there, so try to insert the key | ||
| -- if someone else inserts the same key concurrently, | ||
| -- we could get a unique-key failure | ||
| BEGIN | ||
| INSERT INTO qiita.archive_feature_value (archive_merging_scheme_id, archive_feature, archive_feature_value) VALUES (amsi, af, afv); | ||
| RETURN; | ||
| EXCEPTION WHEN unique_violation THEN | ||
| -- Do nothing, and loop to try the UPDATE again. | ||
| END; | ||
| END LOOP; | ||
| END; | ||
| $$ | ||
| LANGUAGE plpgsql; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Long story short: there is a bug in conda that the Qiime2 guys reported and this is the best way to install everything. See that these changes are in master and are passing.