Skip to content

Commit 1a2c955

Browse files
committed
Fixing all prep template tests
1 parent cc2be6b commit 1a2c955

File tree

2 files changed

+160
-899
lines changed

2 files changed

+160
-899
lines changed

qiita_db/metadata_template/prep_template.py

Lines changed: 61 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,31 @@
88

99
from __future__ import division
1010
from future.builtins import zip
11-
from future.utils import viewitems, PY3
1211
from copy import deepcopy
1312
from os.path import join
1413
from time import strftime
15-
from functools import partial
16-
from os.path import basename
17-
from future.utils.six import StringIO
1814

19-
import pandas as pd
20-
import numpy as np
21-
import warnings
2215
from skbio.util import find_duplicates
23-
from skbio.io.util import open_file
2416

2517
from qiita_core.exceptions import IncompetentQiitaDeveloperError
26-
from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBColumnError,
27-
QiitaDBUnknownIDError, QiitaDBNotImplementedError,
28-
QiitaDBDuplicateHeaderError, QiitaDBError,
29-
QiitaDBWarning, QiitaDBExecutionError)
30-
from qiita_db.base import QiitaObject
18+
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBUnknownIDError,
19+
QiitaDBDuplicateHeaderError, QiitaDBError,
20+
QiitaDBExecutionError)
3121
from qiita_db.sql_connection import SQLConnectionHandler
3222
from qiita_db.ontology import Ontology
33-
from qiita_db.util import (exists_table, get_table_cols,
34-
convert_to_id,
35-
convert_from_id, get_mountpoint, insert_filepaths,
36-
scrub_data, infer_status)
37-
from qiita_db.study import Study
38-
from qiita_db.data import RawData
39-
from qiita_db.logger import LogEntry
23+
from qiita_db.util import (convert_to_id, convert_from_id, get_mountpoint,
24+
infer_status)
4025
from .base_metadata_template import BaseSample, MetadataTemplate
4126
from .util import (as_python_types, get_invalid_sample_names, get_datatypes,
42-
prefix_sample_names_with_id)
27+
prefix_sample_names_with_id, load_template_to_dataframe)
4328

44-
if PY3:
45-
from string import ascii_letters as letters, digits
46-
else:
47-
from string import letters, digits
29+
30+
TARGET_GENE_DATA_TYPES = ['16S', '18S', 'ITS']
31+
REQUIRED_TARGET_GENE_COLS = {'barcodesequence', 'linkerprimersequence',
32+
'run_prefix', 'library_construction_protocol',
33+
'experiment_design_description', 'platform'}
34+
RENAME_COLS_DICT = {'barcode': 'barcodesequence',
35+
'primer': 'linkerprimersequence'}
4836

4937

5038
class PrepSample(BaseSample):
@@ -163,98 +151,73 @@ def create(cls, md_template, raw_data, study, data_type,
163151

164152
# We need to check for some special columns, that are not present on
165153
# the database, but depending on the data type are required.
166-
missing = cls._check_special_columns(md_template, data_type_str)
154+
missing = cls._check_template_special_columns(md_template,
155+
data_type_str)
156+
if missing:
157+
raise QiitaDBColumnError("Missing columns: %s"
158+
% ', '.join(missing))
167159

168160
# Get some useful information from the metadata template
169161
sample_ids = md_template.index.tolist()
170-
num_samples = len(sample_ids)
171-
172-
# Get the required columns from the DB
173-
db_cols = get_table_cols(cls._table, conn_handler)
174-
175-
# Remove the sample_id and study_id columns
176-
db_cols.remove('sample_id')
177-
db_cols.remove(cls._id_column)
178-
179-
# Retrieve the headers of the metadata template
180162
headers = list(md_template.keys())
181163

182-
# Check that md_template has the required columns
183-
remaining = set(db_cols).difference(headers)
184-
missing = missing.union(remaining)
185-
missing = missing.difference(cls.translate_cols_dict)
186-
if missing:
187-
raise QiitaDBColumnError("Missing columns: %s"
188-
% ', '.join(missing))
189-
190164
# Insert the metadata template
191165
# We need the prep_id for multiple calls below, which currently is not
192166
# supported by the queue system. Thus, executing this outside the queue
167+
sql = """INSERT INTO qiita.prep_template
168+
(data_type_id, raw_data_id, investigation_type)
169+
VALUES (%s, %s, %s)
170+
RETURNING prep_template_id"""
193171
prep_id = conn_handler.execute_fetchone(
194-
"INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
195-
"investigation_type) VALUES (%s, %s, %s) RETURNING "
196-
"prep_template_id", (data_type_id, raw_data.id,
197-
investigation_type))[0]
172+
sql, (data_type_id, raw_data.id, investigation_type))[0]
198173

199174
# Insert values on required columns
200-
values = _as_python_types(md_template, db_cols)
201-
values.insert(0, sample_ids)
202-
values.insert(0, [prep_id] * num_samples)
203-
values = [v for v in zip(*values)]
204-
conn_handler.add_to_queue(
205-
queue_name,
206-
"INSERT INTO qiita.{0} ({1}, sample_id, {2}) "
207-
"VALUES (%s, %s, {3})".format(
208-
cls._table, cls._id_column, ', '.join(db_cols),
209-
', '.join(['%s'] * len(db_cols))),
210-
values, many=True)
175+
values = [(prep_id, s_id) for s_id in sample_ids]
176+
sql = "INSERT INTO qiita.{0} ({1}, sample_id) VALUES (%s, %s)".format(
177+
cls._table, cls._id_column)
178+
conn_handler.add_to_queue(queue_name, sql, values, many=True)
211179

212180
# Insert rows on *_columns table
213-
headers = list(set(headers).difference(db_cols))
214-
datatypes = _get_datatypes(md_template.ix[:, headers])
181+
datatypes = get_datatypes(md_template.ix[:, headers])
215182
# psycopg2 requires a list of tuples, in which each tuple is a set
216183
# of values to use in the string formatting of the query. We have all
217184
# the values in different lists (but in the same order) so use zip
218185
# to create the list of tuples that psycopg2 requires.
219-
values = [
220-
v for v in zip([prep_id] * len(headers), headers, datatypes)]
221-
conn_handler.add_to_queue(
222-
queue_name,
223-
"INSERT INTO qiita.{0} ({1}, column_name, column_type) "
224-
"VALUES (%s, %s, %s)".format(cls._column_table, cls._id_column),
225-
values, many=True)
186+
values = [(prep_id, h, d) for h, d in zip(headers, datatypes)]
187+
sql = """INSERT INTO qiita.{0} ({1}, column_name, column_type)
188+
VALUES (%s, %s, %s)""".format(cls._column_table,
189+
cls._id_column)
190+
conn_handler.add_to_queue(queue_name, sql, values, many=True)
226191

227192
# Create table with custom columns
228193
table_name = cls._table_name(prep_id)
229194
column_datatype = ["%s %s" % (col, dtype)
230195
for col, dtype in zip(headers, datatypes)]
231196
conn_handler.add_to_queue(
232197
queue_name,
233-
"CREATE TABLE qiita.{0} (sample_id varchar, "
234-
"{1})".format(table_name, ', '.join(column_datatype)))
198+
"CREATE TABLE qiita.{0} (sample_id varchar, {1})".format(
199+
table_name, ', '.join(column_datatype)))
235200

236201
# Insert values on custom table
237-
values = _as_python_types(md_template, headers)
202+
values = as_python_types(md_template, headers)
238203
values.insert(0, sample_ids)
239204
values = [v for v in zip(*values)]
240-
conn_handler.add_to_queue(
241-
queue_name,
242-
"INSERT INTO qiita.{0} (sample_id, {1}) "
243-
"VALUES (%s, {2})".format(table_name, ", ".join(headers),
244-
', '.join(["%s"] * len(headers))),
245-
values, many=True)
205+
sql = "INSERT INTO qiita.{0} (sample_id, {1}) VALUES (%s, {2})".format(
206+
table_name, ", ".join(headers), ', '.join(["%s"] * len(headers)))
207+
conn_handler.add_to_queue(queue_name, sql, values, many=True)
246208

247209
try:
248210
conn_handler.execute_queue(queue_name)
249211
except Exception:
250212
# Clean up row from qiita.prep_template
251213
conn_handler.execute(
252-
"DELETE FROM qiita.prep_template where "
253-
"{0} = %s".format(cls._id_column), (prep_id,))
214+
"DELETE FROM qiita.prep_template WHERE {0} = %s".format(
215+
cls._id_column),
216+
(prep_id,))
254217

255218
# Check if sample IDs present here but not in sample template
256-
sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
257-
"study_id = %s")
219+
sql = """SELECT sample_id FROM qiita.required_sample_info
220+
WHERE study_id = %s"""
258221
# Get list of study sample IDs, prep template study IDs,
259222
# and their intersection
260223
prep_samples = set(md_template.index.values)
@@ -426,11 +389,11 @@ def raw_data(self):
426389
@property
427390
def preprocessed_data(self):
428391
conn_handler = SQLConnectionHandler()
429-
prep_datas = conn_handler.execute_fetchall(
392+
prep_data = conn_handler.execute_fetchall(
430393
"SELECT preprocessed_data_id FROM "
431394
"qiita.prep_template_preprocessed_data WHERE prep_template_id=%s",
432395
(self.id,))
433-
return [x[0] for x in prep_datas]
396+
return [x[0] for x in prep_data]
434397

435398
@property
436399
def preprocessing_status(self):
@@ -549,9 +512,19 @@ def create_qiime_mapping_file(self, prep_template_fp):
549512
'description': 'Description',
550513
}
551514

515+
sql = """SELECT filepath_id, filepath
516+
FROM qiita.filepath
517+
JOIN qiita.sample_template_filepath
518+
USING (filepath_id)
519+
WHERE study_id=%s
520+
ORDER BY filepath_id DESC"""
521+
552522
# getting the latest sample template
553-
_, sample_template_fp = SampleTemplate(
554-
self.study_id).get_filepaths()[0]
523+
conn_handler = SQLConnectionHandler()
524+
sample_template_fname = conn_handler.execute_fetchall(
525+
sql, (self.study_id,))[0][1]
526+
_, fp = get_mountpoint('templates')[0]
527+
sample_template_fp = join(fp, sample_template_fname)
555528

556529
# reading files via pandas
557530
st = load_template_to_dataframe(sample_template_fp)
@@ -562,8 +535,9 @@ def create_qiime_mapping_file(self, prep_template_fp):
562535
if not pt_sample_names.issubset(st_sample_names):
563536
raise ValueError(
564537
"Prep template is not a sub set of the sample template, files:"
565-
"%s %s - samples: %s" % (sample_template_fp, prep_template_fp,
566-
str(pt_sample_names-st_sample_names)))
538+
"%s %s - samples: %s"
539+
% (sample_template_fp, prep_template_fp,
540+
str(pt_sample_names - st_sample_names)))
567541

568542
mapping = pt.join(st, lsuffix="_prep")
569543
mapping.rename(columns=rename_cols, inplace=True)
@@ -580,7 +554,6 @@ def create_qiime_mapping_file(self, prep_template_fp):
580554
mapping = mapping[new_cols]
581555

582556
# figuring out the filepath for the QIIME map file
583-
_id, fp = get_mountpoint('templates')[0]
584557
filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
585558
self.id, strftime("%Y%m%d-%H%M%S")))
586559

0 commit comments

Comments
 (0)