7
7
# -----------------------------------------------------------------------------
8
8
9
9
from __future__ import division
10
+ from future .utils import viewvalues
10
11
from os .path import join
11
12
from time import strftime
13
+ from copy import deepcopy
14
+ import warnings
15
+
16
+ import pandas as pd
12
17
13
18
from qiita_core .exceptions import IncompetentQiitaDeveloperError
14
19
from qiita_db .exceptions import (QiitaDBColumnError , QiitaDBUnknownIDError ,
15
- QiitaDBError , QiitaDBExecutionError )
20
+ QiitaDBError , QiitaDBExecutionError ,
21
+ QiitaDBWarning )
16
22
from qiita_db .sql_connection import SQLConnectionHandler
17
23
from qiita_db .ontology import Ontology
18
24
from qiita_db .util import (convert_to_id ,
19
25
convert_from_id , get_mountpoint , infer_status )
20
26
from .base_metadata_template import BaseSample , MetadataTemplate
21
27
from .util import load_template_to_dataframe
22
- from .constants import (TARGET_GENE_DATA_TYPES , RENAME_COLS_DICT ,
23
- REQUIRED_TARGET_GENE_COLS )
28
+ from .constants import (TARGET_GENE_DATA_TYPES , PREP_TEMPLATE_COLUMNS ,
29
+ PREP_TEMPLATE_COLUMNS_TARGET_GENE )
24
30
25
31
26
32
class PrepSample (BaseSample ):
@@ -66,8 +72,9 @@ class PrepTemplate(MetadataTemplate):
66
72
_table_prefix = "prep_"
67
73
_column_table = "prep_columns"
68
74
_id_column = "prep_template_id"
69
- translate_cols_dict = {'emp_status_id' : 'emp_status' }
70
75
_sample_cls = PrepSample
76
+ _fp_id = convert_to_id ("prep_template" , "filepath_type" )
77
+ _filepath_table = 'prep_template_filepath'
71
78
72
79
@classmethod
73
80
def create (cls , md_template , raw_data , study , data_type ,
@@ -116,8 +123,13 @@ def create(cls, md_template, raw_data, study, data_type,
116
123
data_type_id = convert_to_id (data_type , "data_type" , conn_handler )
117
124
data_type_str = data_type
118
125
126
+ pt_cols = PREP_TEMPLATE_COLUMNS
127
+ if data_type_str in TARGET_GENE_DATA_TYPES :
128
+ pt_cols = deepcopy (PREP_TEMPLATE_COLUMNS )
129
+ pt_cols .update (PREP_TEMPLATE_COLUMNS_TARGET_GENE )
130
+
119
131
md_template = cls ._clean_validate_template (md_template , study .id ,
120
- data_type_str , conn_handler )
132
+ pt_cols )
121
133
122
134
# Insert the metadata template
123
135
# We need the prep_id for multiple calls below, which currently is not
@@ -140,7 +152,7 @@ def create(cls, md_template, raw_data, study, data_type,
140
152
"{0} = %s" .format (cls ._id_column ), (prep_id ,))
141
153
142
154
# Check if sample IDs present here but not in sample template
143
- sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
155
+ sql = ("SELECT sample_id from qiita.study_sample WHERE "
144
156
"study_id = %s" )
145
157
# Get list of study sample IDs, prep template study IDs,
146
158
# and their intersection
@@ -181,40 +193,6 @@ def validate_investigation_type(self, investigation_type):
181
193
"Choose from: %s" % (investigation_type ,
182
194
', ' .join (terms )))
183
195
184
- @classmethod
185
- def _check_template_special_columns (cls , md_template , data_type ):
186
- r"""Checks for special columns based on obj type
187
-
188
- Parameters
189
- ----------
190
- md_template : DataFrame
191
- The metadata template file contents indexed by sample ids
192
- data_type : str
193
- The data_type of the template.
194
-
195
- Returns
196
- -------
197
- set
198
- The set of missing columns
199
-
200
- Notes
201
- -----
202
- Sometimes people use different names for the same columns. We just
203
- rename them to use the naming that we expect, so this is normalized
204
- across studies.
205
- """
206
- # We only have column requirements if the data type of the raw data
207
- # is one of the target gene types
208
- missing_cols = set ()
209
- if data_type in TARGET_GENE_DATA_TYPES :
210
- md_template .rename (columns = RENAME_COLS_DICT , inplace = True )
211
-
212
- # Check for all required columns for target genes studies
213
- missing_cols = REQUIRED_TARGET_GENE_COLS .difference (
214
- md_template .columns )
215
-
216
- return missing_cols
217
-
218
196
@classmethod
219
197
def delete (cls , id_ ):
220
198
r"""Deletes the table from the database
@@ -412,17 +390,11 @@ def generate_files(self):
412
390
self .add_filepath (fp )
413
391
414
392
# creating QIIME mapping file
415
- self .create_qiime_mapping_file (fp )
393
+ self .create_qiime_mapping_file ()
416
394
417
- def create_qiime_mapping_file (self , prep_template_fp ):
395
+ def create_qiime_mapping_file (self ):
418
396
"""This creates the QIIME mapping file and links it in the db.
419
397
420
- Parameters
421
- ----------
422
- prep_template_fp : str
423
- The prep template filepath that should be concatenated to the
424
- sample template go used to generate a new QIIME mapping file
425
-
426
398
Returns
427
399
-------
428
400
filepath : str
@@ -432,12 +404,20 @@ def create_qiime_mapping_file(self, prep_template_fp):
432
404
------
433
405
ValueError
434
406
If the prep template is not a subset of the sample template
407
+ QiitaDBWarning
408
+ If the QIIME-required columns are not present in the template
409
+
410
+ Notes
411
+ -----
412
+ We cannot ensure that the QIIME-required columns are present in the
413
+ metadata map. However, we have to generate a QIIME-compliant mapping
414
+ file. Since the user may need a QIIME mapping file, but not these
415
+ QIIME-required columns, we are going to create them and
416
+ populate them with the value XXQIITAXX.
435
417
"""
436
418
rename_cols = {
437
419
'barcode' : 'BarcodeSequence' ,
438
- 'barcodesequence' : 'BarcodeSequence' ,
439
420
'primer' : 'LinkerPrimerSequence' ,
440
- 'linkerprimersequence' : 'LinkerPrimerSequence' ,
441
421
'description' : 'Description' ,
442
422
}
443
423
@@ -456,19 +436,38 @@ def create_qiime_mapping_file(self, prep_template_fp):
456
436
457
437
# reading files via pandas
458
438
st = load_template_to_dataframe (sample_template_fp )
459
- pt = load_template_to_dataframe (prep_template_fp )
439
+ pt = self .to_dataframe ()
440
+
460
441
st_sample_names = set (st .index )
461
442
pt_sample_names = set (pt .index )
462
443
463
444
if not pt_sample_names .issubset (st_sample_names ):
464
445
raise ValueError (
465
- "Prep template is not a sub set of the sample template, files:"
466
- "%s %s - samples: %s" % (sample_template_fp , prep_template_fp ,
467
- str (pt_sample_names - st_sample_names )))
446
+ "Prep template is not a sub set of the sample template, files"
447
+ "%s - samples: %s"
448
+ % (sample_template_fp ,
449
+ ', ' .join (pt_sample_names - st_sample_names )))
468
450
469
451
mapping = pt .join (st , lsuffix = "_prep" )
470
452
mapping .rename (columns = rename_cols , inplace = True )
471
453
454
+ # Pre-populate the QIIME-required columns with the value XXQIITAXX
455
+ index = mapping .index
456
+ placeholder = ['XXQIITAXX' ] * len (index )
457
+ missing = []
458
+ for val in viewvalues (rename_cols ):
459
+ if val not in mapping :
460
+ missing .append (val )
461
+ mapping [val ] = pd .Series (placeholder , index = index )
462
+
463
+ if missing :
464
+ warnings .warn (
465
+ "Some columns required to generate a QIIME-compliant mapping "
466
+ "file are not present in the template. A placeholder value "
467
+ "(XXQIITAXX) has been used to populate these columns. Missing "
468
+ "columns: %s" % ', ' .join (missing ),
469
+ QiitaDBWarning )
470
+
472
471
# Gets the orginal mapping columns and readjust the order to comply
473
472
# with QIIME requirements
474
473
cols = mapping .columns .values .tolist ()
@@ -486,11 +485,13 @@ def create_qiime_mapping_file(self, prep_template_fp):
486
485
self .id , strftime ("%Y%m%d-%H%M%S" )))
487
486
488
487
# Save the mapping file
489
- mapping .to_csv (filepath , index_label = '#SampleID' , na_rep = 'unknown ' ,
488
+ mapping .to_csv (filepath , index_label = '#SampleID' , na_rep = '' ,
490
489
sep = '\t ' )
491
490
492
491
# adding the fp to the object
493
- self .add_filepath (filepath )
492
+ self .add_filepath (
493
+ filepath , conn_handler = conn_handler ,
494
+ fp_id = convert_to_id ("qiime_map" , "filepath_type" ))
494
495
495
496
return filepath
496
497
0 commit comments