-
Notifications
You must be signed in to change notification settings - Fork 80
Unify metadata creation #1032
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unify metadata creation #1032
Changes from all commits
fe3eb14
4ae5c62
48d7d95
e5a60f3
cecaf56
510207e
025626b
aff06a3
2ae8609
73502d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,7 @@ | |
convert_to_id, | ||
get_mountpoint, insert_filepaths) | ||
from qiita_db.logger import LogEntry | ||
from .util import as_python_types, get_datatypes | ||
|
||
|
||
class BaseSample(QiitaObject): | ||
|
@@ -518,6 +519,82 @@ def _check_special_columns(cls, md_template, obj): | |
return missing.union( | ||
cls._check_template_special_columns(md_template, obj)) | ||
|
||
@classmethod | ||
def _add_common_creation_steps_to_queue(cls, md_template, obj_id, | ||
conn_handler, queue_name): | ||
r"""Adds the common creation steps to the queue in conn_handler | ||
|
||
Parameters | ||
---------- | ||
md_template : DataFrame | ||
The metadata template file contents indexed by sample ids | ||
obj_id : int | ||
The id of the object being created | ||
conn_handler : SQLConnectionHandler | ||
The connection handler object connected to the DB | ||
queue_name : str | ||
The queue where the SQL statements will be added | ||
""" | ||
cls._check_subclass() | ||
# Get some useful information from the metadata template | ||
sample_ids = md_template.index.tolist() | ||
num_samples = len(sample_ids) | ||
headers = list(md_template.keys()) | ||
|
||
# Get the required columns from the DB | ||
db_cols = get_table_cols(cls._table, conn_handler) | ||
# Remove the sample_id and _id_column columns | ||
db_cols.remove('sample_id') | ||
db_cols.remove(cls._id_column) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there any way _id_column can be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cool |
||
|
||
# Insert values on required columns | ||
values = as_python_types(md_template, db_cols) | ||
values.insert(0, sample_ids) | ||
values.insert(0, [obj_id] * num_samples) | ||
values = [v for v in zip(*values)] | ||
conn_handler.add_to_queue( | ||
queue_name, | ||
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) " | ||
"VALUES (%s, %s, {3})".format(cls._table, cls._id_column, | ||
', '.join(db_cols), | ||
', '.join(['%s'] * len(db_cols))), | ||
values, many=True) | ||
|
||
# Insert rows on *_columns table | ||
headers = list(set(headers).difference(db_cols)) | ||
datatypes = get_datatypes(md_template.ix[:, headers]) | ||
# psycopg2 requires a list of tuples, in which each tuple is a set | ||
# of values to use in the string formatting of the query. We have all | ||
# the values in different lists (but in the same order) so use zip | ||
# to create the list of tuples that psycopg2 requires. | ||
values = [ | ||
v for v in zip([obj_id] * len(headers), headers, datatypes)] | ||
conn_handler.add_to_queue( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Philosophical thing, but I'd add this to the queue after the table is made and filled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's in a queue, all the changes happen or not, so I don't see the added value. Do you have strong feelings? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, not blocking, just philosophical. |
||
queue_name, | ||
"INSERT INTO qiita.{0} ({1}, column_name, column_type) " | ||
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column), | ||
values, many=True) | ||
|
||
# Create table with custom columns | ||
table_name = cls._table_name(obj_id) | ||
column_datatype = ["%s %s" % (col, dtype) | ||
for col, dtype in zip(headers, datatypes)] | ||
conn_handler.add_to_queue( | ||
queue_name, | ||
"CREATE TABLE qiita.{0} (sample_id varchar NOT NULL, {1})".format( | ||
table_name, ', '.join(column_datatype))) | ||
|
||
# Insert values on custom table | ||
values = as_python_types(md_template, headers) | ||
values.insert(0, sample_ids) | ||
values = [v for v in zip(*values)] | ||
conn_handler.add_to_queue( | ||
queue_name, | ||
"INSERT INTO qiita.{0} (sample_id, {1}) " | ||
"VALUES (%s, {2})".format(table_name, ", ".join(headers), | ||
', '.join(["%s"] * len(headers))), | ||
values, many=True) | ||
|
||
@classmethod | ||
def delete(cls, id_): | ||
r"""Deletes the table from the database | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks looks like the function needs the "am I instantiating the base class" test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking on that, but this is a private function and it should be only used in the
create
functions, that already have that test. If you feel strong about it, I can add it though...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would probably be good to add as a safety layer, since it will also mean a more interpretable error.