Skip to content

Commit 0744eed

Browse files
committed
Merge pull request #1032 from josenavas/unify-metadata-creation
Unify metadata creation
2 parents 9068d4c + 73502d0 commit 0744eed

File tree

6 files changed

+272
-110
lines changed

6 files changed

+272
-110
lines changed

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
convert_to_id,
5252
get_mountpoint, insert_filepaths)
5353
from qiita_db.logger import LogEntry
54+
from .util import as_python_types, get_datatypes
5455

5556

5657
class BaseSample(QiitaObject):
@@ -518,6 +519,82 @@ def _check_special_columns(cls, md_template, obj):
518519
return missing.union(
519520
cls._check_template_special_columns(md_template, obj))
520521

522+
@classmethod
523+
def _add_common_creation_steps_to_queue(cls, md_template, obj_id,
524+
conn_handler, queue_name):
525+
r"""Adds the common creation steps to the queue in conn_handler
526+
527+
Parameters
528+
----------
529+
md_template : DataFrame
530+
The metadata template file contents indexed by sample ids
531+
obj_id : int
532+
The id of the object being created
533+
conn_handler : SQLConnectionHandler
534+
The connection handler object connected to the DB
535+
queue_name : str
536+
The queue where the SQL statements will be added
537+
"""
538+
cls._check_subclass()
539+
# Get some useful information from the metadata template
540+
sample_ids = md_template.index.tolist()
541+
num_samples = len(sample_ids)
542+
headers = list(md_template.keys())
543+
544+
# Get the required columns from the DB
545+
db_cols = get_table_cols(cls._table, conn_handler)
546+
# Remove the sample_id and _id_column columns
547+
db_cols.remove('sample_id')
548+
db_cols.remove(cls._id_column)
549+
550+
# Insert values on required columns
551+
values = as_python_types(md_template, db_cols)
552+
values.insert(0, sample_ids)
553+
values.insert(0, [obj_id] * num_samples)
554+
values = [v for v in zip(*values)]
555+
conn_handler.add_to_queue(
556+
queue_name,
557+
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
558+
"VALUES (%s, %s, {3})".format(cls._table, cls._id_column,
559+
', '.join(db_cols),
560+
', '.join(['%s'] * len(db_cols))),
561+
values, many=True)
562+
563+
# Insert rows on *_columns table
564+
headers = list(set(headers).difference(db_cols))
565+
datatypes = get_datatypes(md_template.ix[:, headers])
566+
# psycopg2 requires a list of tuples, in which each tuple is a set
567+
# of values to use in the string formatting of the query. We have all
568+
# the values in different lists (but in the same order) so use zip
569+
# to create the list of tuples that psycopg2 requires.
570+
values = [
571+
v for v in zip([obj_id] * len(headers), headers, datatypes)]
572+
conn_handler.add_to_queue(
573+
queue_name,
574+
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
575+
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
576+
values, many=True)
577+
578+
# Create table with custom columns
579+
table_name = cls._table_name(obj_id)
580+
column_datatype = ["%s %s" % (col, dtype)
581+
for col, dtype in zip(headers, datatypes)]
582+
conn_handler.add_to_queue(
583+
queue_name,
584+
"CREATE TABLE qiita.{0} (sample_id varchar NOT NULL, {1})".format(
585+
table_name, ', '.join(column_datatype)))
586+
587+
# Insert values on custom table
588+
values = as_python_types(md_template, headers)
589+
values.insert(0, sample_ids)
590+
values = [v for v in zip(*values)]
591+
conn_handler.add_to_queue(
592+
queue_name,
593+
"INSERT INTO qiita.{0} (sample_id, {1}) "
594+
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
595+
', '.join(["%s"] * len(headers))),
596+
values, many=True)
597+
521598
@classmethod
522599
def delete(cls, id_):
523600
r"""Deletes the table from the database

qiita_db/metadata_template/prep_template.py

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
# -----------------------------------------------------------------------------
88

99
from __future__ import division
10-
from future.builtins import zip
1110
from copy import deepcopy
1211
from os.path import join
1312
from time import strftime
@@ -24,7 +23,6 @@
2423
convert_from_id, get_mountpoint, infer_status)
2524
from .base_metadata_template import BaseSample, MetadataTemplate
2625
from .util import (get_invalid_sample_names, prefix_sample_names_with_id,
27-
as_python_types, get_datatypes,
2826
load_template_to_dataframe)
2927
from .constants import (TARGET_GENE_DATA_TYPES, RENAME_COLS_DICT,
3028
REQUIRED_TARGET_GENE_COLS)
@@ -152,10 +150,6 @@ def create(cls, md_template, raw_data, study, data_type,
152150
# the database, but depending on the data type are required.
153151
missing = cls._check_special_columns(md_template, data_type_str)
154152

155-
# Get some useful information from the metadata template
156-
sample_ids = md_template.index.tolist()
157-
num_samples = len(sample_ids)
158-
159153
# Get the required columns from the DB
160154
db_cols = get_table_cols(cls._table, conn_handler)
161155

@@ -183,53 +177,8 @@ def create(cls, md_template, raw_data, study, data_type,
183177
"prep_template_id", (data_type_id, raw_data.id,
184178
investigation_type))[0]
185179

186-
# Insert values on required columns
187-
values = as_python_types(md_template, db_cols)
188-
values.insert(0, sample_ids)
189-
values.insert(0, [prep_id] * num_samples)
190-
values = [v for v in zip(*values)]
191-
conn_handler.add_to_queue(
192-
queue_name,
193-
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
194-
"VALUES (%s, %s, {3})".format(
195-
cls._table, cls._id_column, ', '.join(db_cols),
196-
', '.join(['%s'] * len(db_cols))),
197-
values, many=True)
198-
199-
# Insert rows on *_columns table
200-
headers = list(set(headers).difference(db_cols))
201-
datatypes = get_datatypes(md_template.ix[:, headers])
202-
# psycopg2 requires a list of tuples, in which each tuple is a set
203-
# of values to use in the string formatting of the query. We have all
204-
# the values in different lists (but in the same order) so use zip
205-
# to create the list of tuples that psycopg2 requires.
206-
values = [
207-
v for v in zip([prep_id] * len(headers), headers, datatypes)]
208-
conn_handler.add_to_queue(
209-
queue_name,
210-
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
211-
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
212-
values, many=True)
213-
214-
# Create table with custom columns
215-
table_name = cls._table_name(prep_id)
216-
column_datatype = ["%s %s" % (col, dtype)
217-
for col, dtype in zip(headers, datatypes)]
218-
conn_handler.add_to_queue(
219-
queue_name,
220-
"CREATE TABLE qiita.{0} (sample_id varchar, "
221-
"{1})".format(table_name, ', '.join(column_datatype)))
222-
223-
# Insert values on custom table
224-
values = as_python_types(md_template, headers)
225-
values.insert(0, sample_ids)
226-
values = [v for v in zip(*values)]
227-
conn_handler.add_to_queue(
228-
queue_name,
229-
"INSERT INTO qiita.{0} (sample_id, {1}) "
230-
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
231-
', '.join(["%s"] * len(headers))),
232-
values, many=True)
180+
cls._add_common_creation_steps_to_queue(md_template, prep_id,
181+
conn_handler, queue_name)
233182

234183
try:
235184
conn_handler.execute_queue(queue_name)

qiita_db/metadata_template/sample_template.py

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -279,64 +279,9 @@ def create(cls, md_template, study):
279279
md_template = cls._clean_validate_template(md_template, study.id,
280280
conn_handler)
281281

282-
# Get some useful information from the metadata template
283-
sample_ids = md_template.index.tolist()
284-
num_samples = len(sample_ids)
285-
headers = list(md_template.keys())
286-
287-
# Get the required columns from the DB
288-
db_cols = get_table_cols(cls._table, conn_handler)
289-
# Remove the sample_id and study_id columns
290-
db_cols.remove('sample_id')
291-
db_cols.remove(cls._id_column)
292-
293-
# Insert values on required columns
294-
values = as_python_types(md_template, db_cols)
295-
values.insert(0, sample_ids)
296-
values.insert(0, [study.id] * num_samples)
297-
values = [v for v in zip(*values)]
298-
conn_handler.add_to_queue(
299-
queue_name,
300-
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
301-
"VALUES (%s, %s, {3})".format(cls._table, cls._id_column,
302-
', '.join(db_cols),
303-
', '.join(['%s'] * len(db_cols))),
304-
values, many=True)
305-
306-
# Insert rows on *_columns table
307-
headers = list(set(headers).difference(db_cols))
308-
datatypes = get_datatypes(md_template.ix[:, headers])
309-
# psycopg2 requires a list of tuples, in which each tuple is a set
310-
# of values to use in the string formatting of the query. We have all
311-
# the values in different lists (but in the same order) so use zip
312-
# to create the list of tuples that psycopg2 requires.
313-
values = [
314-
v for v in zip([study.id] * len(headers), headers, datatypes)]
315-
conn_handler.add_to_queue(
316-
queue_name,
317-
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
318-
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
319-
values, many=True)
320-
321-
# Create table with custom columns
322-
table_name = cls._table_name(study.id)
323-
column_datatype = ["%s %s" % (col, dtype)
324-
for col, dtype in zip(headers, datatypes)]
325-
conn_handler.add_to_queue(
326-
queue_name,
327-
"CREATE TABLE qiita.{0} (sample_id varchar NOT NULL, {1})".format(
328-
table_name, ', '.join(column_datatype)))
282+
cls._add_common_creation_steps_to_queue(md_template, study.id,
283+
conn_handler, queue_name)
329284

330-
# Insert values on custom table
331-
values = as_python_types(md_template, headers)
332-
values.insert(0, sample_ids)
333-
values = [v for v in zip(*values)]
334-
conn_handler.add_to_queue(
335-
queue_name,
336-
"INSERT INTO qiita.{0} (sample_id, {1}) "
337-
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
338-
', '.join(["%s"] * len(headers))),
339-
values, many=True)
340285
conn_handler.execute_queue(queue_name)
341286

342287
# figuring out the filepath of the backup

qiita_db/metadata_template/test/test_base_metadata_template.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ def test_table_name(self):
5151
with self.assertRaises(IncompetentQiitaDeveloperError):
5252
MetadataTemplate._table_name(self.study)
5353

54+
def test_add_common_creation_steps_to_queue(self):
55+
"""_add_common_creation_steps_to_queue raises an error from base class
56+
"""
57+
with self.assertRaises(IncompetentQiitaDeveloperError):
58+
MetadataTemplate._add_common_creation_steps_to_queue(
59+
None, 1, None, "")
60+
5461

5562
@qiita_test_checker()
5663
class TestMetadataTemplateReadWrite(TestCase):

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,94 @@ def test_to_dataframe(self):
533533
u'samp_size', u'sequencing_meth', u'illumina_technology',
534534
u'sample_center', u'pcr_primers', u'study_center'})
535535

536+
def test_add_common_creation_steps_to_queue(self):
537+
"""add_common_creation_steps_to_queue adds the correct sql statements
538+
"""
539+
metadata_dict = {
540+
'2.SKB8.640193': {'center_name': 'ANL',
541+
'center_project_name': 'Test Project',
542+
'emp_status_id': 1,
543+
'str_column': 'Value for sample 1',
544+
'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
545+
'barcodesequence': 'GTCCGCAAGTTA',
546+
'run_prefix': "s_G1_L001_sequences",
547+
'platform': 'ILLUMINA',
548+
'library_construction_protocol': 'AAAA',
549+
'experiment_design_description': 'BBBB'},
550+
'2.SKD8.640184': {'center_name': 'ANL',
551+
'center_project_name': 'Test Project',
552+
'emp_status_id': 1,
553+
'str_column': 'Value for sample 2',
554+
'linkerprimersequence': 'GTGCCAGCMGCCGCGGTAA',
555+
'barcodesequence': 'CGTAGAGCTCTC',
556+
'run_prefix': "s_G1_L001_sequences",
557+
'platform': 'ILLUMINA',
558+
'library_construction_protocol': 'AAAA',
559+
'experiment_design_description': 'BBBB'},
560+
}
561+
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
562+
563+
conn_handler = SQLConnectionHandler()
564+
queue_name = "TEST_QUEUE"
565+
conn_handler.create_queue(queue_name)
566+
PrepTemplate._add_common_creation_steps_to_queue(
567+
metadata, 2, conn_handler, queue_name)
568+
569+
sql_insert_common = (
570+
'INSERT INTO qiita.common_prep_info '
571+
'(prep_template_id, sample_id, center_name, center_project_name, '
572+
'emp_status_id) '
573+
'VALUES (%s, %s, %s, %s, %s)')
574+
sql_insert_common_params_1 = (2, '2.SKB8.640193', 'ANL',
575+
'Test Project', 1)
576+
sql_insert_common_params_2 = (2, '2.SKD8.640184', 'ANL',
577+
'Test Project', 1)
578+
579+
sql_insert_prep_columns = (
580+
'INSERT INTO qiita.prep_columns '
581+
'(prep_template_id, column_name, column_type) '
582+
'VALUES (%s, %s, %s)')
583+
584+
sql_create_table = (
585+
'CREATE TABLE qiita.prep_2 '
586+
'(sample_id varchar NOT NULL, str_column varchar, '
587+
'run_prefix varchar, barcodesequence varchar, platform varchar, '
588+
'linkerprimersequence varchar, '
589+
'experiment_design_description varchar, '
590+
'library_construction_protocol varchar)')
591+
592+
sql_insert_dynamic = (
593+
'INSERT INTO qiita.prep_2 '
594+
'(sample_id, str_column, run_prefix, barcodesequence, platform, '
595+
'linkerprimersequence, experiment_design_description, '
596+
'library_construction_protocol) '
597+
'VALUES (%s, %s, %s, %s, %s, %s, %s, %s)')
598+
599+
sql_insert_dynamic_params_1 = (
600+
'2.SKB8.640193', 'Value for sample 1', 's_G1_L001_sequences',
601+
'GTCCGCAAGTTA', 'ILLUMINA', 'GTGCCAGCMGCCGCGGTAA', 'BBBB', 'AAAA')
602+
sql_insert_dynamic_params_2 = (
603+
'2.SKD8.640184', 'Value for sample 2', 's_G1_L001_sequences',
604+
'CGTAGAGCTCTC', 'ILLUMINA', 'GTGCCAGCMGCCGCGGTAA', 'BBBB', 'AAAA')
605+
606+
exp = [
607+
(sql_insert_common, sql_insert_common_params_1),
608+
(sql_insert_common, sql_insert_common_params_2),
609+
(sql_insert_prep_columns, (2, 'str_column', 'varchar')),
610+
(sql_insert_prep_columns, (2, 'run_prefix', 'varchar')),
611+
(sql_insert_prep_columns, (2, 'barcodesequence', 'varchar')),
612+
(sql_insert_prep_columns, (2, 'platform', 'varchar')),
613+
(sql_insert_prep_columns, (2, 'linkerprimersequence', 'varchar')),
614+
(sql_insert_prep_columns,
615+
(2, 'experiment_design_description', 'varchar')),
616+
(sql_insert_prep_columns,
617+
(2, 'library_construction_protocol', 'varchar')),
618+
(sql_create_table, None),
619+
(sql_insert_dynamic, sql_insert_dynamic_params_1),
620+
(sql_insert_dynamic, sql_insert_dynamic_params_2)]
621+
622+
self.assertEqual(conn_handler.queues[queue_name], exp)
623+
536624

537625
@qiita_test_checker()
538626
class TestPrepTemplateReadWrite(BaseTestPrepTemplate):

0 commit comments

Comments
 (0)