Skip to content

Commit f392de5

Browse files
committed
Fixing merge conflicts
2 parents d69be3b + 3aa8f73 commit f392de5

File tree

6 files changed

+272
-206
lines changed

6 files changed

+272
-206
lines changed

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 86 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,25 @@
4040
from os.path import join
4141
from functools import partial
4242
from collections import defaultdict
43+
from copy import deepcopy
4344

4445
import pandas as pd
46+
from skbio.util import find_duplicates
4547

4648
from qiita_core.exceptions import IncompetentQiitaDeveloperError
47-
from qiita_db.exceptions import (QiitaDBUnknownIDError,
49+
50+
from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError,
4851
QiitaDBNotImplementedError,
49-
QiitaDBColumnError,
50-
QiitaDBExecutionError)
52+
QiitaDBExecutionError,
53+
QiitaDBDuplicateHeaderError)
5154
from qiita_db.base import QiitaObject
5255
from qiita_db.sql_connection import SQLConnectionHandler
5356
from qiita_db.util import (exists_table, get_table_cols,
5457
convert_to_id,
5558
get_mountpoint, insert_filepaths)
5659
from qiita_db.logger import LogEntry
57-
from .util import as_python_types, get_datatypes
60+
from .util import (as_python_types, get_datatypes, get_invalid_sample_names,
61+
prefix_sample_names_with_id)
5862

5963

6064
class BaseSample(QiitaObject):
@@ -599,9 +603,8 @@ def _check_special_columns(cls, md_template, obj):
599603
----------
600604
md_template : DataFrame
601605
The metadata template file contents indexed by sample ids
602-
obj : Study or RawData
603-
The obj to which the metadata template belongs to. Study in case
604-
of SampleTemplate and RawData in case of PrepTemplate
606+
obj : object
607+
Any extra object needed by the template to perform any extra check
605608
"""
606609
# Check required columns
607610
missing = set(cls.translate_cols_dict.values()).difference(md_template)
@@ -617,6 +620,82 @@ def _check_special_columns(cls, md_template, obj):
617620
return missing.union(
618621
cls._check_template_special_columns(md_template, obj))
619622

623+
@classmethod
624+
def _clean_validate_template(cls, md_template, study_id, obj,
625+
conn_handler=None):
626+
"""Takes care of all validation and cleaning of metadata templates
627+
628+
Parameters
629+
----------
630+
md_template : DataFrame
631+
The metadata template file contents indexed by sample ids
632+
study_id : int
633+
The study to which the metadata template belongs to.
634+
obj : object
635+
Any extra object needed by the template to perform any extra check
636+
637+
Returns
638+
-------
639+
md_template : DataFrame
640+
Cleaned copy of the input md_template
641+
642+
Raises
643+
------
644+
QiitaDBColumnError
645+
If the sample names in md_template contains invalid names
646+
QiitaDBDuplicateHeaderError
647+
If md_template contains duplicate headers
648+
QiitaDBColumnError
649+
If md_template is missing a required column
650+
"""
651+
cls._check_subclass()
652+
invalid_ids = get_invalid_sample_names(md_template.index)
653+
if invalid_ids:
654+
raise QiitaDBColumnError("The following sample names in the "
655+
"template contain invalid characters "
656+
"(only alphanumeric characters or periods"
657+
" are allowed): %s." %
658+
", ".join(invalid_ids))
659+
# We are going to modify the md_template. We create a copy so
660+
# we don't modify the user one
661+
md_template = deepcopy(md_template)
662+
663+
# Prefix the sample names with the study_id
664+
prefix_sample_names_with_id(md_template, study_id)
665+
666+
# In the database, all the column headers are lowercase
667+
md_template.columns = [c.lower() for c in md_template.columns]
668+
669+
# Check that we don't have duplicate columns
670+
if len(set(md_template.columns)) != len(md_template.columns):
671+
raise QiitaDBDuplicateHeaderError(
672+
find_duplicates(md_template.columns))
673+
674+
# We need to check for some special columns, that are not present on
675+
# the database, but depending on the data type are required.
676+
missing = cls._check_special_columns(md_template, obj)
677+
678+
conn_handler = conn_handler if conn_handler else SQLConnectionHandler()
679+
680+
# Get the required columns from the DB
681+
db_cols = get_table_cols(cls._table, conn_handler)
682+
683+
# Remove the sample_id and study_id columns
684+
db_cols.remove('sample_id')
685+
db_cols.remove(cls._id_column)
686+
687+
# Retrieve the headers of the metadata template
688+
headers = list(md_template.keys())
689+
690+
# Check that md_template has the required columns
691+
remaining = set(db_cols).difference(headers)
692+
missing = missing.union(remaining)
693+
missing = missing.difference(cls.translate_cols_dict)
694+
if missing:
695+
raise QiitaDBColumnError("Missing columns: %s"
696+
% ', '.join(missing))
697+
return md_template
698+
620699
@classmethod
621700
def _add_common_creation_steps_to_queue(cls, md_template, obj_id,
622701
conn_handler, queue_name):

qiita_db/metadata_template/prep_template.py

Lines changed: 5 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,18 @@
77
# -----------------------------------------------------------------------------
88

99
from __future__ import division
10-
from copy import deepcopy
1110
from os.path import join
1211
from time import strftime
1312

14-
from skbio.util import find_duplicates
15-
1613
from qiita_core.exceptions import IncompetentQiitaDeveloperError
1714
from qiita_db.exceptions import (QiitaDBColumnError, QiitaDBUnknownIDError,
18-
QiitaDBDuplicateHeaderError, QiitaDBError,
19-
QiitaDBExecutionError)
15+
QiitaDBError, QiitaDBExecutionError)
2016
from qiita_db.sql_connection import SQLConnectionHandler
2117
from qiita_db.ontology import Ontology
22-
from qiita_db.util import (get_table_cols, get_emp_status, convert_to_id,
18+
from qiita_db.util import (get_emp_status, convert_to_id,
2319
convert_from_id, get_mountpoint, infer_status)
2420
from .base_metadata_template import BaseSample, MetadataTemplate
25-
from .util import (get_invalid_sample_names, prefix_sample_names_with_id,
26-
load_template_to_dataframe)
21+
from .util import load_template_to_dataframe
2722
from .constants import (TARGET_GENE_DATA_TYPES, RENAME_COLS_DICT,
2823
REQUIRED_TARGET_GENE_COLS)
2924

@@ -109,29 +104,6 @@ def create(cls, md_template, raw_data, study, data_type,
109104
if investigation_type is not None:
110105
cls.validate_investigation_type(investigation_type)
111106

112-
invalid_ids = get_invalid_sample_names(md_template.index)
113-
if invalid_ids:
114-
raise QiitaDBColumnError("The following sample names in the prep"
115-
" template contain invalid characters "
116-
"(only alphanumeric characters or periods"
117-
" are allowed): %s." %
118-
", ".join(invalid_ids))
119-
120-
# We are going to modify the md_template. We create a copy so
121-
# we don't modify the user one
122-
md_template = deepcopy(md_template)
123-
124-
# Prefix the sample names with the study_id
125-
prefix_sample_names_with_id(md_template, study.id)
126-
127-
# In the database, all the column headers are lowercase
128-
md_template.columns = [c.lower() for c in md_template.columns]
129-
130-
# Check that we don't have duplicate columns
131-
if len(set(md_template.columns)) != len(md_template.columns):
132-
raise QiitaDBDuplicateHeaderError(
133-
find_duplicates(md_template.columns))
134-
135107
# Get a connection handler
136108
conn_handler = SQLConnectionHandler()
137109
queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
@@ -146,27 +118,8 @@ def create(cls, md_template, raw_data, study, data_type,
146118
data_type_id = convert_to_id(data_type, "data_type", conn_handler)
147119
data_type_str = data_type
148120

149-
# We need to check for some special columns, that are not present on
150-
# the database, but depending on the data type are required.
151-
missing = cls._check_special_columns(md_template, data_type_str)
152-
153-
# Get the required columns from the DB
154-
db_cols = get_table_cols(cls._table, conn_handler)
155-
156-
# Remove the sample_id and study_id columns
157-
db_cols.remove('sample_id')
158-
db_cols.remove(cls._id_column)
159-
160-
# Retrieve the headers of the metadata template
161-
headers = list(md_template.keys())
162-
163-
# Check that md_template has the required columns
164-
remaining = set(db_cols).difference(headers)
165-
missing = missing.union(remaining)
166-
missing = missing.difference(cls.translate_cols_dict)
167-
if missing:
168-
raise QiitaDBColumnError("Missing columns: %s"
169-
% ', '.join(missing))
121+
md_template = cls._clean_validate_template(md_template, study.id,
122+
data_type_str, conn_handler)
170123

171124
# Insert the metadata template
172125
# We need the prep_id for multiple calls below, which currently is not

qiita_db/metadata_template/sample_template.py

Lines changed: 3 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,23 @@
88

99
from __future__ import division
1010
from future.builtins import zip
11-
from copy import deepcopy
1211
from os.path import join
1312
from time import strftime
1413
from os.path import basename
1514

1615
import pandas as pd
1716
import warnings
18-
from skbio.util import find_duplicates
1917

2018
from qiita_core.exceptions import IncompetentQiitaDeveloperError
21-
from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBColumnError,
22-
QiitaDBDuplicateHeaderError, QiitaDBError,
19+
from qiita_db.exceptions import (QiitaDBDuplicateError, QiitaDBError,
2320
QiitaDBWarning)
2421
from qiita_db.sql_connection import SQLConnectionHandler
2522
from qiita_db.util import (get_table_cols, get_required_sample_info_status,
2623
get_mountpoint, scrub_data)
2724
from qiita_db.study import Study
2825
from qiita_db.data import RawData
2926
from .base_metadata_template import BaseSample, MetadataTemplate
30-
from .util import (get_invalid_sample_names, prefix_sample_names_with_id,
31-
as_python_types, get_datatypes)
27+
from .util import as_python_types, get_datatypes
3228
from .prep_template import PrepTemplate
3329

3430

@@ -114,70 +110,6 @@ def _check_template_special_columns(cls, md_template, study_id):
114110
"""
115111
return set()
116112

117-
@classmethod
118-
def _clean_validate_template(cls, md_template, study_id,
119-
conn_handler=None):
120-
"""Takes care of all validation and cleaning of sample templates
121-
122-
Parameters
123-
----------
124-
md_template : DataFrame
125-
The metadata template file contents indexed by sample ids
126-
study_id : int
127-
The study to which the sample template belongs to.
128-
129-
Returns
130-
-------
131-
md_template : DataFrame
132-
Cleaned copy of the input md_template
133-
"""
134-
invalid_ids = get_invalid_sample_names(md_template.index)
135-
if invalid_ids:
136-
raise QiitaDBColumnError("The following sample names in the sample"
137-
" template contain invalid characters "
138-
"(only alphanumeric characters or periods"
139-
" are allowed): %s." %
140-
", ".join(invalid_ids))
141-
# We are going to modify the md_template. We create a copy so
142-
# we don't modify the user one
143-
md_template = deepcopy(md_template)
144-
145-
# Prefix the sample names with the study_id
146-
prefix_sample_names_with_id(md_template, study_id)
147-
148-
# In the database, all the column headers are lowercase
149-
md_template.columns = [c.lower() for c in md_template.columns]
150-
151-
# Check that we don't have duplicate columns
152-
if len(set(md_template.columns)) != len(md_template.columns):
153-
raise QiitaDBDuplicateHeaderError(
154-
find_duplicates(md_template.columns))
155-
156-
# We need to check for some special columns, that are not present on
157-
# the database, but depending on the data type are required.
158-
missing = cls._check_special_columns(md_template, study_id)
159-
160-
conn_handler = conn_handler if conn_handler else SQLConnectionHandler()
161-
162-
# Get the required columns from the DB
163-
db_cols = get_table_cols(cls._table, conn_handler)
164-
165-
# Remove the sample_id and study_id columns
166-
db_cols.remove('sample_id')
167-
db_cols.remove(cls._id_column)
168-
169-
# Retrieve the headers of the metadata template
170-
headers = list(md_template.keys())
171-
172-
# Check that md_template has the required columns
173-
remaining = set(db_cols).difference(headers)
174-
missing = missing.union(remaining)
175-
missing = missing.difference(cls.translate_cols_dict)
176-
if missing:
177-
raise QiitaDBColumnError("Missing columns: %s"
178-
% ', '.join(missing))
179-
return md_template
180-
181113
@classmethod
182114
def create(cls, md_template, study):
183115
r"""Creates the sample template in the database
@@ -201,7 +133,7 @@ def create(cls, md_template, study):
201133

202134
# Clean and validate the metadata template given
203135
md_template = cls._clean_validate_template(md_template, study.id,
204-
conn_handler)
136+
study.id, conn_handler)
205137

206138
cls._add_common_creation_steps_to_queue(md_template, study.id,
207139
conn_handler, queue_name)
@@ -404,37 +336,3 @@ def update(self, md_template):
404336
# word qiime within the name of the file
405337
if '_qiime_' not in basename(fp):
406338
pt.create_qiime_mapping_file(fp)
407-
408-
def add_category(self, category, samples_and_values, dtype, default):
409-
"""Add a metadata category
410-
411-
Parameters
412-
----------
413-
category : str
414-
The category to add
415-
samples_and_values : dict
416-
A mapping of {sample_id: value}
417-
dtype : str
418-
The datatype of the column
419-
default : object
420-
The default value associated with the column. This must be
421-
specified as these columns are added "not null".
422-
423-
Raises
424-
------
425-
QiitaDBDuplicateError
426-
If the column already exists
427-
"""
428-
table_name = self._table_name(self.study_id)
429-
conn_handler = SQLConnectionHandler()
430-
431-
if category in self.categories():
432-
raise QiitaDBDuplicateError(category, "N/A")
433-
434-
conn_handler.execute("""
435-
ALTER TABLE qiita.{0}
436-
ADD COLUMN {1} {2}
437-
NOT NULL DEFAULT '{3}'""".format(table_name, category, dtype,
438-
default))
439-
440-
self.update_category(category, samples_and_values)

qiita_db/metadata_template/test/test_base_metadata_template.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ def test_add_common_creation_steps_to_queue(self):
5858
MetadataTemplate._add_common_creation_steps_to_queue(
5959
None, 1, None, "")
6060

61+
def test_clean_validate_template(self):
62+
"""_clean_validate_template raises an error from base class"""
63+
with self.assertRaises(IncompetentQiitaDeveloperError):
64+
MetadataTemplate._clean_validate_template(None, 1, None, None)
65+
6166

6267
@qiita_test_checker()
6368
class TestMetadataTemplateReadWrite(TestCase):

0 commit comments

Comments
 (0)