Skip to content

Commit fe3eb14

Browse files
committed
Move common code to base class
1 parent 0a04779 commit fe3eb14

File tree

3 files changed

+80
-110
lines changed

3 files changed

+80
-110
lines changed

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
convert_to_id,
5252
get_mountpoint, insert_filepaths)
5353
from qiita_db.logger import LogEntry
54+
from .util import as_python_types, get_datatypes
5455

5556

5657
class BaseSample(QiitaObject):
@@ -519,6 +520,81 @@ def _check_special_columns(cls, md_template, obj):
519520
return missing.union(
520521
cls._check_template_special_columns(md_template, obj))
521522

523+
@classmethod
524+
def _add_common_creation_steps_to_queue(cls, md_template, obj_id,
525+
conn_handler, queue_name):
526+
r"""Adds the common creation steps to the queue in conn_handler
527+
528+
Parameters
529+
----------
530+
md_template : DataFrame
531+
The metadata template file contents indexed by sample ids
532+
obj_id : int
533+
The id of the object being created
534+
conn_handler : SQLConnectionHandler
535+
The connection handler object connected to the DB
536+
queue_name : str
537+
The queue where the SQL statements will be added
538+
"""
539+
# Get some useful information from the metadata template
540+
sample_ids = md_template.index.tolist()
541+
num_samples = len(sample_ids)
542+
headers = list(md_template.keys())
543+
544+
# Get the required columns from the DB
545+
db_cols = get_table_cols(cls._table, conn_handler)
546+
# Remove the sample_id and _id_column columns
547+
db_cols.remove('sample_id')
548+
db_cols.remove(cls._id_column)
549+
550+
# Insert values on required columns
551+
values = as_python_types(md_template, db_cols)
552+
values.insert(0, sample_ids)
553+
values.insert(0, [obj_id] * num_samples)
554+
values = [v for v in zip(*values)]
555+
conn_handler.add_to_queue(
556+
queue_name,
557+
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
558+
"VALUES (%s, %s, {3})".format(cls._table, cls._id_column,
559+
', '.join(db_cols),
560+
', '.join(['%s'] * len(db_cols))),
561+
values, many=True)
562+
563+
# Insert rows on *_columns table
564+
headers = list(set(headers).difference(db_cols))
565+
datatypes = get_datatypes(md_template.ix[:, headers])
566+
# psycopg2 requires a list of tuples, in which each tuple is a set
567+
# of values to use in the string formatting of the query. We have all
568+
# the values in different lists (but in the same order) so use zip
569+
# to create the list of tuples that psycopg2 requires.
570+
values = [
571+
v for v in zip([obj_id] * len(headers), headers, datatypes)]
572+
conn_handler.add_to_queue(
573+
queue_name,
574+
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
575+
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
576+
values, many=True)
577+
578+
# Create table with custom columns
579+
table_name = cls._table_name(obj_id)
580+
column_datatype = ["%s %s" % (col, dtype)
581+
for col, dtype in zip(headers, datatypes)]
582+
conn_handler.add_to_queue(
583+
queue_name,
584+
"CREATE TABLE qiita.{0} (sample_id varchar NOT NULL, {1})".format(
585+
table_name, ', '.join(column_datatype)))
586+
587+
# Insert values on custom table
588+
values = as_python_types(md_template, headers)
589+
values.insert(0, sample_ids)
590+
values = [v for v in zip(*values)]
591+
conn_handler.add_to_queue(
592+
queue_name,
593+
"INSERT INTO qiita.{0} (sample_id, {1}) "
594+
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
595+
', '.join(["%s"] * len(headers))),
596+
values, many=True)
597+
522598
@classmethod
523599
def delete(cls, id_):
524600
r"""Deletes the table from the database

qiita_db/metadata_template/prep_template.py

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
# -----------------------------------------------------------------------------
88

99
from __future__ import division
10-
from future.builtins import zip
1110
from copy import deepcopy
1211
from os.path import join
1312
from time import strftime
@@ -24,7 +23,6 @@
2423
convert_from_id, get_mountpoint, infer_status)
2524
from .base_metadata_template import BaseSample, MetadataTemplate
2625
from .util import (get_invalid_sample_names, prefix_sample_names_with_id,
27-
as_python_types, get_datatypes,
2826
load_template_to_dataframe)
2927
from .constants import (TARGET_GENE_DATA_TYPES, RENAME_COLS_DICT,
3028
REQUIRED_TARGET_GENE_COLS)
@@ -152,10 +150,6 @@ def create(cls, md_template, raw_data, study, data_type,
152150
# the database, but depending on the data type are required.
153151
missing = cls._check_special_columns(md_template, data_type_str)
154152

155-
# Get some useful information from the metadata template
156-
sample_ids = md_template.index.tolist()
157-
num_samples = len(sample_ids)
158-
159153
# Get the required columns from the DB
160154
db_cols = get_table_cols(cls._table, conn_handler)
161155

@@ -183,53 +177,8 @@ def create(cls, md_template, raw_data, study, data_type,
183177
"prep_template_id", (data_type_id, raw_data.id,
184178
investigation_type))[0]
185179

186-
# Insert values on required columns
187-
values = as_python_types(md_template, db_cols)
188-
values.insert(0, sample_ids)
189-
values.insert(0, [prep_id] * num_samples)
190-
values = [v for v in zip(*values)]
191-
conn_handler.add_to_queue(
192-
queue_name,
193-
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
194-
"VALUES (%s, %s, {3})".format(
195-
cls._table, cls._id_column, ', '.join(db_cols),
196-
', '.join(['%s'] * len(db_cols))),
197-
values, many=True)
198-
199-
# Insert rows on *_columns table
200-
headers = list(set(headers).difference(db_cols))
201-
datatypes = get_datatypes(md_template.ix[:, headers])
202-
# psycopg2 requires a list of tuples, in which each tuple is a set
203-
# of values to use in the string formatting of the query. We have all
204-
# the values in different lists (but in the same order) so use zip
205-
# to create the list of tuples that psycopg2 requires.
206-
values = [
207-
v for v in zip([prep_id] * len(headers), headers, datatypes)]
208-
conn_handler.add_to_queue(
209-
queue_name,
210-
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
211-
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
212-
values, many=True)
213-
214-
# Create table with custom columns
215-
table_name = cls._table_name(prep_id)
216-
column_datatype = ["%s %s" % (col, dtype)
217-
for col, dtype in zip(headers, datatypes)]
218-
conn_handler.add_to_queue(
219-
queue_name,
220-
"CREATE TABLE qiita.{0} (sample_id varchar, "
221-
"{1})".format(table_name, ', '.join(column_datatype)))
222-
223-
# Insert values on custom table
224-
values = as_python_types(md_template, headers)
225-
values.insert(0, sample_ids)
226-
values = [v for v in zip(*values)]
227-
conn_handler.add_to_queue(
228-
queue_name,
229-
"INSERT INTO qiita.{0} (sample_id, {1}) "
230-
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
231-
', '.join(["%s"] * len(headers))),
232-
values, many=True)
180+
cls._add_common_creation_steps_to_queue(md_template, prep_id,
181+
conn_handler, queue_name)
233182

234183
try:
235184
conn_handler.execute_queue(queue_name)

qiita_db/metadata_template/sample_template.py

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -279,64 +279,9 @@ def create(cls, md_template, study):
279279
md_template = cls._clean_validate_template(md_template, study.id,
280280
conn_handler)
281281

282-
# Get some useful information from the metadata template
283-
sample_ids = md_template.index.tolist()
284-
num_samples = len(sample_ids)
285-
headers = list(md_template.keys())
286-
287-
# Get the required columns from the DB
288-
db_cols = get_table_cols(cls._table, conn_handler)
289-
# Remove the sample_id and study_id columns
290-
db_cols.remove('sample_id')
291-
db_cols.remove(cls._id_column)
292-
293-
# Insert values on required columns
294-
values = as_python_types(md_template, db_cols)
295-
values.insert(0, sample_ids)
296-
values.insert(0, [study.id] * num_samples)
297-
values = [v for v in zip(*values)]
298-
conn_handler.add_to_queue(
299-
queue_name,
300-
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
301-
"VALUES (%s, %s, {3})".format(cls._table, cls._id_column,
302-
', '.join(db_cols),
303-
', '.join(['%s'] * len(db_cols))),
304-
values, many=True)
305-
306-
# Insert rows on *_columns table
307-
headers = list(set(headers).difference(db_cols))
308-
datatypes = get_datatypes(md_template.ix[:, headers])
309-
# psycopg2 requires a list of tuples, in which each tuple is a set
310-
# of values to use in the string formatting of the query. We have all
311-
# the values in different lists (but in the same order) so use zip
312-
# to create the list of tuples that psycopg2 requires.
313-
values = [
314-
v for v in zip([study.id] * len(headers), headers, datatypes)]
315-
conn_handler.add_to_queue(
316-
queue_name,
317-
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
318-
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
319-
values, many=True)
320-
321-
# Create table with custom columns
322-
table_name = cls._table_name(study.id)
323-
column_datatype = ["%s %s" % (col, dtype)
324-
for col, dtype in zip(headers, datatypes)]
325-
conn_handler.add_to_queue(
326-
queue_name,
327-
"CREATE TABLE qiita.{0} (sample_id varchar NOT NULL, {1})".format(
328-
table_name, ', '.join(column_datatype)))
282+
cls._add_common_creation_steps_to_queue(md_template, study.id,
283+
conn_handler, queue_name)
329284

330-
# Insert values on custom table
331-
values = as_python_types(md_template, headers)
332-
values.insert(0, sample_ids)
333-
values = [v for v in zip(*values)]
334-
conn_handler.add_to_queue(
335-
queue_name,
336-
"INSERT INTO qiita.{0} (sample_id, {1}) "
337-
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
338-
', '.join(["%s"] * len(headers))),
339-
values, many=True)
340285
conn_handler.execute_queue(queue_name)
341286

342287
# figuring out the filepath of the backup

0 commit comments

Comments
 (0)