-
Notifications
You must be signed in to change notification settings - Fork 80
Analysis cart creation #1025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Analysis cart creation #1025
Changes from all commits
90339fd
72267a0
10f077d
f1ca570
e362c25
cfe9ba2
28f8480
f0c3f80
6d30936
4f3756f
b5bf4ed
68488c7
90b3de2
bd3d513
f5026f8
874632b
ddf2ff4
1dc1440
cf1123f
15c71dc
8e129e6
e784006
d7b4778
ba42c6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1303,12 +1303,53 @@ def data_type(self, ret_id=False): | |
return data_type[0] | ||
|
||
@property | ||
def processed_date(self): | ||
"""Return the processed date""" | ||
def processing_info(self): | ||
"""Return the processing item and settings used to create the data | ||
|
||
Returns | ||
------- | ||
dict | ||
Parameter settings keyed to the parameter, along with date and | ||
algorithm used | ||
""" | ||
# Get processed date and the info for the dynamic table | ||
conn_handler = SQLConnectionHandler() | ||
return conn_handler.execute_fetchone( | ||
"SELECT processed_date FROM qiita.{0} WHERE " | ||
"processed_data_id=%s".format(self._table), (self.id,))[0] | ||
sql = """SELECT processed_date, processed_params_table, | ||
processed_params_id FROM qiita.{0} | ||
WHERE processed_data_id=%s""".format(self._table) | ||
static_info = conn_handler.execute_fetchone(sql, (self.id,)) | ||
|
||
# Get the info from the dynamic table, including reference used | ||
sql = """SELECT * from qiita.{0} | ||
JOIN qiita.reference USING (reference_id) | ||
WHERE processed_params_id = {1} | ||
""".format(static_info['processed_params_table'], | ||
static_info['processed_params_id']) | ||
dynamic_info = dict(conn_handler.execute_fetchone(sql)) | ||
|
||
# replace reference filepath_ids with full filepaths | ||
# figure out what columns have filepaths and what don't | ||
ref_fp_cols = {'sequence_filepath', 'taxonomy_filepath', | ||
'tree_filepath'} | ||
fp_ids = [str(dynamic_info[col]) for col in ref_fp_cols | ||
if dynamic_info[col] is not None] | ||
# Get the filepaths and create dict of fpid to filepath | ||
sql = ("SELECT filepath_id, filepath FROM qiita.filepath WHERE " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this query can be merged with the previous so a single query is done in this entire code, would you mind to take a look? I can potentially give it a shot later today if you can't. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason it's this way is because only sequence_filepath is a required column, so the other two may not exist. This leads to issues with JOINs that expect all three columns to exist. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh true, thanks! |
||
"filepath_id IN ({})").format(','.join(fp_ids)) | ||
lookup = {fp[0]: fp[1] for fp in conn_handler.execute_fetchall(sql)} | ||
# Loop through and replace ids | ||
for key in ref_fp_cols: | ||
if dynamic_info[key] is not None: | ||
dynamic_info[key] = lookup[dynamic_info[key]] | ||
|
||
# add missing info to the dictionary and remove id column info | ||
dynamic_info['processed_date'] = static_info['processed_date'] | ||
dynamic_info['algorithm'] = static_info[ | ||
'processed_params_table'].split('_')[-1] | ||
del dynamic_info['processed_params_id'] | ||
del dynamic_info['reference_id'] | ||
|
||
return dynamic_info | ||
|
||
@property | ||
def samples(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
-- March 28, 2015 | ||
-- Add default analyses for all existing users | ||
DO $do$ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really like this patch I think we can potentially use some of the SQL tricks to reduce some of the code in the qiita_db objects and we will improve the performance/quality of our code. |
||
DECLARE | ||
eml varchar; | ||
aid bigint; | ||
BEGIN | ||
FOR eml IN | ||
SELECT email FROM qiita.qiita_user | ||
LOOP | ||
INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES (eml, eml || '-dflt', 'dflt', true, 1) RETURNING analysis_id INTO aid; | ||
INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (aid, 2); | ||
END LOOP; | ||
END $do$; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,7 @@ class User(QiitaObject): | |
info | ||
user_studies | ||
shared_studies | ||
default_analysis | ||
private_analyses | ||
shared_analyses | ||
|
||
|
@@ -224,10 +225,21 @@ def create(cls, email, password, info=None): | |
# for sql insertion | ||
columns = info.keys() | ||
values = [info[col] for col in columns] | ||
queue = "add_user_%s" % email | ||
conn_handler.create_queue(queue) | ||
# crete user | ||
sql = "INSERT INTO qiita.{0} ({1}) VALUES ({2})".format( | ||
cls._table, ','.join(columns), ','.join(['%s'] * len(values))) | ||
conn_handler.add_to_queue(queue, sql, values) | ||
# create user default sample holder | ||
sql = ("INSERT INTO qiita.analysis " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is personal opinion but I think this highly improves code readability and even when failures happen they're easier to read on the command line. Instead of using quotes ("), I like to user triple quotes (""") and then align the SQL in a easy to read way. In this example: sql = """INSERT INTO qiita.analysis
(email, name, description, dflt, analysis_status_id)
VALUES (%s, %s, %s, %s, 1)""" The cool thing about this, is that if the query fails, it gets also formatted on the CLI. Also it is a more natural way of reading SQL as things align better (note the small indentation on the column names). This is not blocking though... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can do this, but it is also something to put in contributing.md and needs to be consistently done across the entire codebase. Again, if that's agreeable I will make the change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it's not on contributing so no worries if you don't want to change it. I'm doing it as I'm changing other parts of the code, but there is no documentation at this point. |
||
"(email, name, description, dflt, analysis_status_id) " | ||
"VALUES (%s, %s, %s, %s, 1)") | ||
conn_handler.add_to_queue(queue, sql, | ||
(email, '%s-dflt' % email, 'dflt', True)) | ||
|
||
conn_handler.execute_queue(queue) | ||
|
||
sql = ("INSERT INTO qiita.%s (%s) VALUES (%s)" % | ||
(cls._table, ','.join(columns), ','.join(['%s'] * len(values)))) | ||
conn_handler.execute(sql, values) | ||
return cls(email) | ||
|
||
@classmethod | ||
|
@@ -329,6 +341,13 @@ def info(self, info): | |
"email = %s".format(self._table, ','.join(sql_insert))) | ||
conn_handler.execute(sql, data) | ||
|
||
@property | ||
def default_analysis(self): | ||
sql = ("SELECT analysis_id FROM qiita.analysis WHERE email = %s AND " | ||
"dflt = true") | ||
conn_handler = SQLConnectionHandler() | ||
return conn_handler.execute_fetchone(sql, [self._id])[0] | ||
|
||
@property | ||
def sandbox_studies(self): | ||
"""Returns a list of sandboxed study ids owned by the user""" | ||
|
@@ -360,8 +379,8 @@ def shared_studies(self): | |
@property | ||
def private_analyses(self): | ||
"""Returns a list of private analysis ids owned by the user""" | ||
sql = ("Select analysis_id from qiita.analysis WHERE email = %s AND " | ||
"analysis_status_id <> 6") | ||
sql = ("SELECT analysis_id FROM qiita.analysis " | ||
"WHERE email = %s AND dflt = false") | ||
conn_handler = SQLConnectionHandler() | ||
analysis_ids = conn_handler.execute_fetchall(sql, (self._id, )) | ||
return {a[0] for a in analysis_ids} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm getting confused in this SQL query, it looks like it is enough to do:
I think this SQL query can return only a single value, since there will be only one row with the given processed_params_id.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, was being way to protective about the join screwing up. Changed.