Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 95 additions & 4 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
from qiita_core.exceptions import IncompetentQiitaDeveloperError
import qiita_db as qdb

from string import letters, digits


class BaseSample(qdb.base.QiitaObject):
r"""Sample object that accesses the db to get the information of a sample
Expand Down Expand Up @@ -434,6 +436,9 @@ class MetadataTemplate(qdb.base.QiitaObject):
_table_prefix = None
_id_column = None
_sample_cls = None
# forbidden_words not defined for base class. Please redefine for
# sub-classes.
_forbidden_words = {}

def _check_id(self, id_):
r"""Checks that the MetadataTemplate id_ exists on the database"""
Expand Down Expand Up @@ -484,12 +489,15 @@ def _clean_validate_template(cls, md_template, study_id,
Returns
-------
md_template : DataFrame
Cleaned copy of the input md_template
Cleaned deep-copy of the input md_template:
Removes 'qiita_study_id' and 'qiita_prep_id' columns,
if present.

Raises
------
QiitaDBColumnError
If the sample names in md_template contains invalid names
If the column names in md_template contains invalid characers,
forbidden words, or PostgreSQL-reserved words.
QiitaDBWarning
If there are missing columns required for some functionality
"""
Expand All @@ -513,7 +521,7 @@ def _clean_validate_template(cls, md_template, study_id,
# In the database, all the column headers are lowercase
md_template.columns = [c.lower() for c in md_template.columns]

# Droping/Ignoring internal generated colums
# drop these columns in the result
if 'qiita_study_id' in md_template.columns:
del md_template['qiita_study_id']
if 'qiita_prep_id' in md_template.columns:
Expand All @@ -522,9 +530,34 @@ def _clean_validate_template(cls, md_template, study_id,
# validating pgsql reserved words not to be column headers
current_headers = set(md_template.columns.values)

qdb.metadata_template.util.validate_invalid_column_names(
# testing for specific column names that are not included in the other
# tests.

pgsql_reserved = cls._identify_pgsql_reserved_words_in_column_names(
current_headers)
invalid = cls._identify_column_names_with_invalid_characters(
current_headers)
forbidden = cls._identify_forbidden_words_in_column_names(
current_headers)

error = []
if pgsql_reserved:
error.append(
"The following column names in the template contain PgSQL "
"reserved words: %s." % ", ".join(pgsql_reserved))
if invalid:
error.append(
"The following column names in the template contain invalid "
"chars: %s." % ", ".join(invalid))
if forbidden:
error.append(
"The following column names in the template contain invalid "
"values: %s." % ", ".join(forbidden))

if error:
raise qdb.exceptions.QiitaDBColumnError(
"%s\nYou need to modify them." % '\n'.join(error))

# Prefix the sample names with the study_id
qdb.metadata_template.util.prefix_sample_names_with_id(md_template,
study_id)
Expand Down Expand Up @@ -1533,3 +1566,61 @@ def validate(self, restriction_dict):
"columns:\n\t%s.\nSee the Templates tutorial for a description"
" of these fields." % ";\n\t".join(warning_msg),
qdb.exceptions.QiitaDBWarning)

@classmethod
def _identify_forbidden_words_in_column_names(cls, column_names):
"""Return a list of forbidden words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of forbidden words present in the column_names iterable.
"""
return set(cls._forbidden_words) & set(column_names)

@classmethod
def _identify_pgsql_reserved_words_in_column_names(cls, column_names):
"""Return a list of PostgreSQL-reserved words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of reserved words present in the column_names iterable.

References
----------
.. [1] postgresql SQL-SYNTAX-IDENTIFIERS: https://goo.gl/EF0cUV.
"""
return (qdb.metadata_template.util.get_pgsql_reserved_words() &
set(column_names))

@classmethod
def _identify_column_names_with_invalid_characters(cls, column_names):
"""Return a list of invalid words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of words containing invalid (illegal) characters.
"""
valid_initial_char = letters
valid_rest = set(letters+digits+'_')
invalid = []
for s in column_names:
if s[0] not in valid_initial_char:
invalid.append(s)
elif set(s) - valid_rest:
invalid.append(s)
return set(invalid)
4 changes: 4 additions & 0 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class PrepTemplate(MetadataTemplate):
_id_column = "prep_template_id"
_sample_cls = PrepSample
_filepath_table = 'prep_template_filepath'
_forbidden_words = {
'sampleid',
'qiita_study_id',
'qiita_prep_id'}

@classmethod
def create(cls, md_template, study, data_type, investigation_type=None,
Expand Down
12 changes: 11 additions & 1 deletion qiita_db/metadata_template/sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ class SampleTemplate(MetadataTemplate):
_id_column = "study_id"
_sample_cls = Sample
_filepath_table = 'sample_template_filepath'
_forbidden_words = {
'barcodesequence',
'linkerprimersequence',
'barcode',
'linker',
'primer',
'run_prefix',
'sampleid',
'qiita_study_id',
'qiita_prep_id'}

@classmethod
def create(cls, md_template, study):
Expand Down Expand Up @@ -153,7 +163,7 @@ def study_id(self):

@property
def columns_restrictions(self):
"""Gets the dictionary of colums required
"""Gets the dictionary of columns required

Returns
-------
Expand Down
27 changes: 27 additions & 0 deletions qiita_db/metadata_template/test/test_base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,33 @@ def test_clean_validate_template(self):
with self.assertRaises(IncompetentQiitaDeveloperError):
MT._clean_validate_template(None, 1)

def test_identify_pgsql_reserved_words(self):
MT = qdb.metadata_template.base_metadata_template.MetadataTemplate
results = MT._identify_pgsql_reserved_words_in_column_names([
'select',
'column',
'just_fine1'])
self.assertTrue(set(results) == {'column', 'select'})

def test_identify_invalid_characters(self):
MT = qdb.metadata_template.base_metadata_template.MetadataTemplate
results = MT._identify_column_names_with_invalid_characters([
'tax on',
'bla.',
'.',
'sampleid',
'sample_id',
'{',
'this|is',
'4column',
'just_fine2'])
self.assertTrue(set(results) == {'tax on',
'bla.',
'.',
'{',
'this|is',
'4column'})


if __name__ == '__main__':
main()
35 changes: 35 additions & 0 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,41 @@ def test_clean_validate_template(self):
exp.sort_index(axis=1, inplace=True)
assert_frame_equal(obs, exp)

def test_clean_validate_template_no_forbidden_words1(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'sampleid'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_forbidden_words4(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'linkerprimersequence'},
inplace=True)
raised = False
try:
PT._clean_validate_template(self.metadata, 2)
except qdb.exceptions.QiitaDBColumnError:
raised = True
self.assertFalse(raised, "Exception raised")

def test_clean_validate_template_no_pgsql_reserved_words(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'select'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'taxon id'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars2(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'bla.'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_get_category(self):
pt = qdb.metadata_template.prep_template.PrepTemplate(1)
obs = pt.get_category('primer')
Expand Down
84 changes: 76 additions & 8 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import qiita_db as qdb


STC = qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS


@qiita_test_checker()
class TestSample(TestCase):
def setUp(self):
Expand Down Expand Up @@ -201,10 +204,10 @@ def test_get_none(self):
self.assertTrue(self.tester.get('Not_a_Category') is None)

def test_columns_restrictions(self):
"""that it returns SAMPLE_TEMPLATE_COLUMNS"""
"""that it returns STC"""
self.assertEqual(
self.sample_template.columns_restrictions,
qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS)
STC)

def test_can_be_updated(self):
"""test if the template can be updated"""
Expand Down Expand Up @@ -744,8 +747,9 @@ def test_clean_validate_template_columns(self):
dtype=str)
ST = qdb.metadata_template.sample_template.SampleTemplate
obs = ST._clean_validate_template(
metadata, 2,
qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS)
metadata,
2,
current_columns=STC)
metadata_dict = {
'2.Sample1': {'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
Expand All @@ -766,8 +770,9 @@ def test_clean_validate_template_columns(self):
def test_clean_validate_template(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
obs = ST._clean_validate_template(
self.metadata, 2,
qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS)
self.metadata,
2,
current_columns=STC)
metadata_dict = {
'2.Sample1': {'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
Expand Down Expand Up @@ -825,6 +830,69 @@ def test_clean_validate_template_no_invalid_chars(self):
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars2(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
self.metadata.rename(columns={'taxon_id': 'bla.'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars3(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
self.metadata.rename(columns={'taxon_id': 'this|is'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_forbidden_words(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
self.metadata.rename(columns={'taxon_id': 'sampleid'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_forbidden_words2(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
# A word forbidden only in SampleTemplate
self.metadata.rename(columns={'taxon_id': 'linkerprimersequence'},
inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_forbidden_words3(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
# A word forbidden only in SampleTemplate
self.metadata.rename(columns={'taxon_id': 'barcode'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
ST._clean_validate_template(self.metadata, 2)

# this test migrated to SampleTemplate, from MetadataTemplate, to test
# _identify_forbidden_words_in_column_names() with a usable list of
# forbidden words.
def test_identify_forbidden_words_in_column_names(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
# tests filtering for sample_id, when it is not the first element
# verifies all forbidden elements for base class are returned
# verifies a forbidden word in sub-class will not be returned
# verifies normal column names are not returned
results = ST._identify_forbidden_words_in_column_names([
'just_fine3',
'sampleid',
'alice',
'linkerprimersequence',
'bob',
'qiita_study_id',
'qiita_prep_id',
'eve'])
self.assertEqual(set(results),
{'qiita_prep_id',
'qiita_study_id',
'linkerprimersequence',
'sampleid'})

def test_silent_drop(self):
ST = qdb.metadata_template.sample_template.SampleTemplate
self.assertNotIn('qiitq_prep_id',
(ST._clean_validate_template(self.metadata,
2)).columns.tolist())

def test_get_category(self):
pt = qdb.metadata_template.sample_template.SampleTemplate(1)
obs = pt.get_category('latitude')
Expand Down Expand Up @@ -1886,7 +1954,7 @@ def test_to_dataframe(self):

def test_check_restrictions(self):
obs = self.tester.check_restrictions(
[qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS['EBI']])
[STC['EBI']])
self.assertEqual(obs, set([]))

def test_ebi_sample_accessions(self):
Expand Down Expand Up @@ -2049,7 +2117,7 @@ def test_validate_template_warning_missing_restrictions(self):
qdb.metadata_template.sample_template.SampleTemplate.create,
self.metadata, self.new_study)
obs = st.check_restrictions(
[qdb.metadata_template.constants.SAMPLE_TEMPLATE_COLUMNS['EBI']])
[STC['EBI']])
self.assertEqual(obs, {'collection_timestamp'})

def test_validate_errors(self):
Expand Down
Loading