Skip to content

Fix _almost_ all qiita db tests #1099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 28, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions qiita_db/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,9 +369,9 @@ def delete(cls, raw_data_id, study_id):
"""
SELECT EXISTS(
SELECT * FROM qiita.prep_template AS pt
LEFT JOIN qiita.common_prep_info AS cpi ON
LEFT JOIN qiita.prep_template_sample AS cpi ON
(pt.prep_template_id=cpi.prep_template_id)
LEFT JOIN qiita.required_sample_info AS rsi ON
LEFT JOIN qiita.study_sample AS rsi ON
(cpi.sample_id=rsi.sample_id)
WHERE raw_data_id = {0} and study_id = {1}
)
Expand Down
30 changes: 12 additions & 18 deletions qiita_db/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ def __repr__(self):


class SearchTerm(object):
# column names from required_sample_info table
required_cols = set(get_table_cols("required_sample_info"))
# column names from study table
study_cols = set(get_table_cols("study"))

Expand All @@ -128,7 +126,7 @@ def __init__(self, tokens):
self.term[pos] = scrub_data(term)

def generate_sql(self):
# we can assume that the metadata is either in required_sample_info
# we can assume that the metadata is either in study_sample
# or the study-specific table
column_name, operator, argument = self.term
argument_type = type(convert_type(argument))
Expand All @@ -140,9 +138,7 @@ def generate_sql(self):
if operator not in allowable_types[argument_type]:
raise QiitaDBIncompatibleDatatypeError(operator, argument_type)

if column_name in self.required_cols:
column_name = "r.%s" % column_name.lower()
elif column_name in self.study_cols:
if column_name in self.study_cols:
column_name = "st.%s" % column_name.lower()
else:
column_name = "sa.%s" % column_name.lower()
Expand All @@ -167,8 +163,6 @@ def __repr__(self):
class QiitaStudySearch(object):
"""QiitaStudySearch object to parse and run searches on studies."""

# column names from required_sample_info table
required_cols = set(get_table_cols("required_sample_info"))
# column names from study table
study_cols = set(get_table_cols("study"))

Expand Down Expand Up @@ -310,9 +304,9 @@ def _parse_study_search_string(self, searchstr,
meta_header_type_lookup[header] = 'varchar'

# create the study finding SQL
# remove metadata headers that are in required_sample_info table
meta_headers = tuple(meta_headers.difference(
self.required_cols).difference(self.study_cols))
# remove metadata headers that are in study table
meta_headers.discard('sample_id')
meta_headers = tuple(meta_headers.difference(self.study_cols))

# get all study ids that contain all metadata categories searched for
sql = []
Expand Down Expand Up @@ -341,17 +335,17 @@ def _parse_study_search_string(self, searchstr,
# build the sql formatted list of metadata headers
header_info = []
for meta in meta_header_type_lookup:
if meta in self.required_cols:
header_info.append("r.%s" % meta)
elif meta in self.study_cols:
if meta in self.study_cols:
header_info.append("st.%s" % meta)
else:
header_info.append("sa.%s" % meta)
# build the SQL query
sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info "
"r JOIN qiita.sample_{0} sa ON sa.sample_id = "
"r.sample_id JOIN qiita.study st ON st.study_id = "
"r.study_id WHERE %s" %

sample_sql = ("SELECT ss.sample_id,%s "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE %s" %
(','.join(header_info), sql_where))
return study_sql, sample_sql, meta_header_type_lookup.keys()

Expand Down
75 changes: 40 additions & 35 deletions qiita_db/test/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ def test_parse_study_search_string(self):
exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
"lower(column_name) = lower('altitude') and column_type "
"in ('integer', 'float8')")
exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE sa.altitude > 0")
exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE sa.altitude > 0")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ["altitude"])
Expand All @@ -41,11 +42,11 @@ def test_parse_study_search_string(self):
exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
"lower(column_name) = lower('altitude') and column_type "
"in ('integer', 'float8')")
exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE NOT "
"sa.altitude > 0")
exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE NOT sa.altitude > 0")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ["altitude"])
Expand All @@ -56,11 +57,11 @@ def test_parse_study_search_string(self):
exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
"lower(column_name) = lower('ph') and column_type in "
"('integer', 'float8')")
exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE (sa.ph > 7 AND "
"sa.ph < 9)")
exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE (sa.ph > 7 AND sa.ph < 9)")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ["ph"])
Expand All @@ -71,11 +72,11 @@ def test_parse_study_search_string(self):
exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
"lower(column_name) = lower('ph') and column_type in "
"('integer', 'float8')")
exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE (sa.ph > 7 OR "
"sa.ph < 9)")
exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE (sa.ph > 7 OR sa.ph < 9)")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ["ph"])
Expand All @@ -84,12 +85,15 @@ def test_parse_study_search_string(self):
st_sql, samp_sql, meta = \
self.search._parse_study_search_string(
'host_subject_id includes "Chicken little"')
exp_st_sql = "SELECT study_id FROM qiita.study_sample_columns"
exp_samp_sql = ("SELECT r.sample_id,r.host_subject_id FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE "
"LOWER(r.host_subject_id) LIKE '%chicken little%'")
exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns "
"WHERE lower(column_name) = lower('host_subject_id') "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need to have lower('host_subject_id') is there?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i.e. there's no need to call lower on that string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is actually built programmatically, and lowers everything it gets so that search is case-insensitive.

"and column_type in ('varchar')")
exp_samp_sql = ("SELECT ss.sample_id,sa.host_subject_id "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE LOWER(sa.host_subject_id) "
"LIKE '%chicken little%'")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ["host_subject_id"])
Expand All @@ -104,11 +108,12 @@ def test_parse_study_search_string(self):
"lower(column_name) = lower('name') and column_type in "
"('varchar')")
exp_samp_sql = (
"SELECT r.sample_id,sa.name FROM qiita.required_sample_info r JOIN"
" qiita.sample_{0} sa ON sa.sample_id = r.sample_id JOIN "
"qiita.study st ON st.study_id = r.study_id WHERE (sa.name = "
"'Billy Bob' OR sa.name = 'Timmy' OR (sa.name = 'Jimbo' AND "
"sa.name > 25) OR sa.name < 5)")
"SELECT ss.sample_id,sa.name "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE (sa.name = 'Billy Bob' OR sa.name = 'Timmy' OR "
"(sa.name = 'Jimbo' AND sa.name > 25) OR sa.name < 5)")
self.assertEqual(st_sql, exp_st_sql)
self.assertEqual(samp_sql, exp_samp_sql)
self.assertEqual(meta, ['name'])
Expand All @@ -124,11 +129,11 @@ def test_parse_study_search_string(self):
"('integer', 'float8')", "SELECT study_id FROM "
"qiita.study_sample_columns WHERE lower(column_name) = "
"lower('ph') and column_type in ('integer', 'float8')"]
exp_samp_sql = ("SELECT r.sample_id,sa.pH,sa.ph FROM "
"qiita.required_sample_info r JOIN qiita.sample_{0} sa"
" ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
" st.study_id = r.study_id WHERE (sa.ph > 7 OR "
"sa.ph < 9)")
exp_samp_sql = ("SELECT ss.sample_id,sa.pH,sa.ph "
"FROM qiita.study_sample ss "
"JOIN qiita.sample_{0} sa USING (sample_id) "
"JOIN qiita.study st USING (study_id) "
"WHERE (sa.ph > 7 OR sa.ph < 9)")
# use the split list to make sure the SQL is properly formed
self.assertEqual(len(st_sql), 2)
pos = exp_st_sql.index(st_sql[0])
Expand Down
8 changes: 4 additions & 4 deletions qiita_db/test/test_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def test_study_raw_data(self):
self.assertEqual(get_count("qiita.study_raw_data"), 4)

def test_required_sample_info(self):
self.assertEqual(get_count("qiita.required_sample_info"), 27)
self.assertEqual(get_count("qiita.study_sample"), 27)

def test_study_sample_columns(self):
self.assertEqual(get_count("qiita.study_sample_columns"), 21)
self.assertEqual(get_count("qiita.study_sample_columns"), 30)

def test_sample_1(self):
self.assertEqual(get_count("qiita.sample_1"), 27)
Expand All @@ -66,10 +66,10 @@ def test_prep_template(self):
self.assertEqual(get_count("qiita.prep_template"), 1)

def test_common_prep_info(self):
self.assertEqual(get_count("qiita.common_prep_info"), 27)
self.assertEqual(get_count("qiita.prep_template_sample"), 27)

def test_prep_columns(self):
self.assertEqual(get_count("qiita.prep_columns"), 19)
self.assertEqual(get_count("qiita.prep_columns"), 22)

def test_prep_1(self):
self.assertEqual(get_count("qiita.prep_1"), 27)
Expand Down
27 changes: 2 additions & 25 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
check_count, get_processed_params_tables,
params_dict_to_json, insert_filepaths,
get_db_files_base_dir, get_data_types,
get_required_sample_info_status,
get_emp_status, purge_filepaths, get_filepath_id,
purge_filepaths, get_filepath_id,
get_lat_longs, get_mountpoint,
get_mountpoint_path_by_id,
get_files_from_uploads_folders,
Expand Down Expand Up @@ -101,7 +100,7 @@ def test_get_lat_longs(self):
[38.2627021402, 3.48274264219]]

obs = get_lat_longs()
self.assertEqual(obs, exp)
self.assertItemsEqual(obs, exp)

def test_check_table_cols(self):
# Doesn't do anything if correct info passed, only errors if wrong info
Expand Down Expand Up @@ -236,28 +235,6 @@ def test_get_data_types(self):
exp = {v: k for k, v in exp.items()}
self.assertEqual(obs, exp)

def test_get_required_sample_info_status(self):
"""Tests that get_required_sample_info_status works"""
obs = get_required_sample_info_status()
exp = {'received': 1, 'in_preparation': 2, 'running': 3,
'completed': 4}
self.assertEqual(obs, exp)

obs = get_required_sample_info_status(
key='required_sample_info_status_id')
exp = {v: k for k, v in exp.items()}
self.assertEqual(obs, exp)

def test_get_emp_status(self):
"""Tests that get_emp_status works"""
obs = get_emp_status()
exp = {'EMP': 1, 'EMP_Processed': 2, 'NOT_EMP': 3}
self.assertEqual(obs, exp)

obs = get_emp_status(key='emp_status_id')
exp = {v: k for k, v in exp.items()}
self.assertEqual(obs, exp)

def test_get_count(self):
"""Checks that get_count retrieves proper count"""
self.assertEqual(get_count('qiita.study_person'), 3)
Expand Down
79 changes: 20 additions & 59 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,62 +212,6 @@ def get_data_types(key='data_type'):
return dict(con.execute_fetchall(sql))


def get_required_sample_info_status(key='status'):
"""Gets the list of possible required sample info status

Parameters
----------
key : {'status', 'required_sample_info_status_id'}, optional
Defaults to 'status'. Determines the format of the returned dict.

Returns
-------
dict
- If `key` is "status", dict is of the form
{status: required_sample_info_status_id}
- If `key` is "required_sample_info_status_id", dict is of the form
{required_sample_info_status_id: status}
"""
con = SQLConnectionHandler()
if key == 'status':
cols = 'status, required_sample_info_status_id'
elif key == 'required_sample_info_status_id':
cols = 'required_sample_info_status_id, status'
else:
raise QiitaDBColumnError("Unknown key. Pass either 'status' or "
"'required_sample_info_status_id'")
sql = 'select {} from qiita.required_sample_info_status'.format(cols)
return dict(con.execute_fetchall(sql))


def get_emp_status(key='emp_status'):
"""Gets the list of possible emp statuses

Parameters
----------
key : {'emp_status', 'emp_status_id'}, optional
Defaults to 'status'. Determines the format of the returned dict.

Returns
-------
dict
- If `key` is "emp_status", dict is of the form
{emp_status: emp_status_id}
- If `key` is "emp_status_id", dict is of the form
{emp_status_id: emp_status}
"""
con = SQLConnectionHandler()
if key == 'emp_status':
cols = 'emp_status, emp_status_id'
elif key == 'emp_status_id':
cols = 'emp_status_id, emp_status'
else:
raise QiitaDBColumnError("Unknown key. Pass either 'emp_status' or "
"'emp_status_id'")
sql = 'select {} from qiita.emp_status'.format(cols)
return dict(con.execute_fetchall(sql))


def create_rand_string(length, punct=True):
"""Returns a string of random ascii characters

Expand Down Expand Up @@ -1007,10 +951,27 @@ def get_processed_params_tables():


def get_lat_longs():
"""Retrieve the latitude and longitude of all the samples in the DB

Returns
-------
list of [float, float]
The latitude and longitude for each sample in the database
"""
conn = SQLConnectionHandler()
sql = """select latitude, longitude
from qiita.required_sample_info"""
return conn.execute_fetchall(sql)
sql = """SELECT DISTINCT table_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor and non-blocking, but would you mind adding a docstring in this function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

FROM information_schema.columns
WHERE SUBSTR(table_name, 1, 7) = 'sample_'
AND table_schema = 'qiita'
AND column_name IN ('latitude', 'longitude');"""
tables_gen = (t[0] for t in conn.execute_fetchall(sql))

sql = "SELECT latitude, longitude FROM qiita.{0}"
result = []
for table in tables_gen:
result.extend(conn.execute_fetchall(sql.format(table)))

return result


def get_environmental_packages(conn_handler=None):
Expand Down