Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 101 additions & 3 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# -----------------------------------------------------------------------------

from __future__ import division
from future.utils import viewitems
from future.utils import PY3, viewitems
from future.builtins import zip
from itertools import chain
from copy import deepcopy
Expand All @@ -50,6 +50,11 @@
from qiita_core.exceptions import IncompetentQiitaDeveloperError
import qiita_db as qdb

if PY3:
from string import ascii_letters as letters, digits
else:
from string import letters, digits


class BaseSample(qdb.base.QiitaObject):
r"""Sample object that accesses the db to get the information of a sample
Expand Down Expand Up @@ -434,6 +439,11 @@ class MetadataTemplate(qdb.base.QiitaObject):
_table_prefix = None
_id_column = None
_sample_cls = None
_forbidden_words = {
'sampleid',
'qiita_study_id',
'qiita_prep_id'
}

def _check_id(self, id_):
r"""Checks that the MetadataTemplate id_ exists on the database"""
Expand Down Expand Up @@ -513,7 +523,12 @@ def _clean_validate_template(cls, md_template, study_id,
# In the database, all the column headers are lowercase
md_template.columns = [c.lower() for c in md_template.columns]

# Droping/Ignoring internal generated colums
# Droping/Ignoring internal generated columns:
# A legacy feature of _clean_validate_template() is that it silently
# removes these columns before validating. It would be better to make
# this optional. These two keywords are also in forbidden words lists.
# Hence, it's possible to write unit test code that provides these
# words to this method and yet fails to raise an Error.
if 'qiita_study_id' in md_template.columns:
del md_template['qiita_study_id']
if 'qiita_prep_id' in md_template.columns:
Expand All @@ -522,9 +537,34 @@ def _clean_validate_template(cls, md_template, study_id,
# validating pgsql reserved words not to be column headers
current_headers = set(md_template.columns.values)

qdb.metadata_template.util.validate_invalid_column_names(
# testing for specific column names that are not included in the other
# tests.

pgsql_reserved = cls._identify_pgsql_reserved_words_in_column_names(
current_headers)
invalid = cls._identify_column_names_with_invalid_characters(
current_headers)
forbidden = cls._identify_forbidden_words_in_column_names(
current_headers)

error = []
if pgsql_reserved:
error.append(
"The following column names in the template contain PgSQL "
"reserved words: %s." % ", ".join(pgsql_reserved))
if invalid:
error.append(
"The following column names in the template contain invalid "
"chars: %s." % ", ".join(invalid))
if forbidden:
error.append(
"The following column names in the template contain invalid "
"values: %s." % ", ".join(forbidden))

if error:
raise qdb.exceptions.QiitaDBColumnError(
"%s\nYou need to modify them." % '\n'.join(error))

# Prefix the sample names with the study_id
qdb.metadata_template.util.prefix_sample_names_with_id(md_template,
study_id)
Expand Down Expand Up @@ -1533,3 +1573,61 @@ def validate(self, restriction_dict):
"columns:\n\t%s.\nSee the Templates tutorial for a description"
" of these fields." % ";\n\t".join(warning_msg),
qdb.exceptions.QiitaDBWarning)

@classmethod
def _identify_forbidden_words_in_column_names(cls, column_names):
"""Return a list of forbidden words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of forbidden words present in the column_names iterable.
"""
return set(cls._forbidden_words) & set(column_names)

@classmethod
def _identify_pgsql_reserved_words_in_column_names(cls, column_names):
"""Return a list of PostgreSQL-reserved words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of reserved words present in the column_names iterable.

References
----------
.. [1] postgresql SQL-SYNTAX-IDENTIFIERS: https://goo.gl/EF0cUV.
"""
return (qdb.metadata_template.util.get_pgsql_reserved_words() &
set(column_names))

@classmethod
def _identify_column_names_with_invalid_characters(cls, column_names):
"""Return a list of invalid words found in column_names.

Parameters
----------
column_names : iterable
Iterable containing the column names to check.

Returns
------
set of words containing invalid (illegal) characters.
"""
valid_initial_char = letters
valid_rest = set(letters+digits+'_')
invalid = []
for s in column_names:
if s[0] not in valid_initial_char:
invalid.append(s)
elif set(s) - valid_rest:
invalid.append(s)
return set(invalid)
7 changes: 7 additions & 0 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ class PrepTemplate(MetadataTemplate):
_id_column = "prep_template_id"
_sample_cls = PrepSample
_filepath_table = 'prep_template_filepath'
# This has been explicitly redefined here for clarity, even though
# PrepTemplate does not differ at this point from the base MetadataTemplate
# class.
_forbidden_words = {
'sampleid',
'qiita_study_id',
'qiita_prep_id'}

@classmethod
def create(cls, md_template, study, data_type, investigation_type=None,
Expand Down
12 changes: 11 additions & 1 deletion qiita_db/metadata_template/sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ class SampleTemplate(MetadataTemplate):
_id_column = "study_id"
_sample_cls = Sample
_filepath_table = 'sample_template_filepath'
_forbidden_words = {
'barcodesequence',
'linkerprimersequence',
'barcode',
'linker',
'primer',
'run_prefix',
'sampleid',
'qiita_study_id',
'qiita_prep_id'}

@classmethod
def create(cls, md_template, study):
Expand Down Expand Up @@ -153,7 +163,7 @@ def study_id(self):

@property
def columns_restrictions(self):
"""Gets the dictionary of colums required
"""Gets the dictionary of columns required

Returns
-------
Expand Down
54 changes: 54 additions & 0 deletions qiita_db/metadata_template/test/test_base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,60 @@ def test_clean_validate_template(self):
with self.assertRaises(IncompetentQiitaDeveloperError):
MT._clean_validate_template(None, 1)

def test_identify_forbidden_words(self):
"""_identify_forbidden_words_in_column_names returns forbidden words
"""
MT = qdb.metadata_template.base_metadata_template.MetadataTemplate
# tests filtering for sample_id, when it is not the first element
# verifies all forbidden elements for base class are returned
# verifies a forbidden word in sub-class will not be returned
# verifies normal column names are not returned
results = MT._identify_forbidden_words_in_column_names([
'just_fine3',
'sampleid',
'alice',
'linkerprimersequence',
'bob',
'qiita_study_id',
'qiita_prep_id',
'eve'])
self.assertTrue(set(results), {'qiita_prep_id',
'qiita_study_id',
'sampleid'})

def test_identify_pgsql_reserved_words(self):
"""_identify_pgsql_reserved_words_in_column_names returns words
matching the database's current list of reserved words.
"""
MT = qdb.metadata_template.base_metadata_template.MetadataTemplate
results = MT._identify_pgsql_reserved_words_in_column_names([
'select',
'column',
'just_fine1'])
self.assertTrue(set(results), {'column', 'select'})

def test_identify_invalid_characters(self):
"""_identify_column_names_with_invalid_characters returns words
containing invalid characters.
"""
MT = qdb.metadata_template.base_metadata_template.MetadataTemplate
results = MT._identify_column_names_with_invalid_characters([
'tax on',
'bla.',
'.',
'sampleid',
'sample_id',
'{',
'this|is',
'4column',
'just_fine2'])
self.assertTrue(set(results) == {'tax on',
'bla.',
'.',
'{',
'this|is',
'4column'})


if __name__ == '__main__':
main()
59 changes: 59 additions & 0 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,65 @@ def test_clean_validate_template(self):
exp.sort_index(axis=1, inplace=True)
assert_frame_equal(obs, exp)

def test_clean_validate_template_no_forbidden_words1(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'sampleid'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

'''
Commenting these two out for now. There is legacy code in
_clean_validate_template that automatically removes internally generated
columns qiita_study_id and qiita_prep_id before validating. Need to
revisit this and see how existing code might break when removing the
'remover'.

def test_clean_validate_template_no_forbidden_words2(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'qiita_study_id'},
inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_forbidden_words3(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'qiita_prep_id'},
inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)
'''

def test_clean_validate_template_no_forbidden_words4(self):
"""Raises error on false-positive detection of forbidden word"""
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'linkerprimersequence'},
inplace=True)
raised = False
try:
PT._clean_validate_template(self.metadata, 2)
except qdb.exceptions.QiitaDBColumnError:
raised = True

self.assertFalse(raised, "Exception raised")

def test_clean_validate_template_no_pgsql_reserved_words(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'select'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'taxon id'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_clean_validate_template_no_invalid_chars2(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
self.metadata.rename(columns={'center_name': 'bla.'}, inplace=True)
with self.assertRaises(qdb.exceptions.QiitaDBColumnError):
PT._clean_validate_template(self.metadata, 2)

def test_get_category(self):
pt = qdb.metadata_template.prep_template.PrepTemplate(1)
obs = pt.get_category('primer')
Expand Down
Loading