Merge pull request #1099 from josenavas/fix-qiita-db-tests

squirrelo · squirrelo · commit b26a8676dfd0 · 2015-04-28T10:06:25.000-07:00
Fix _almost_ all qiita db tests
diff --git a/qiita_db/data.py b/qiita_db/data.py
@@ -369,9 +369,9 @@ def delete(cls, raw_data_id, study_id):
             """
             SELECT EXISTS(
                 SELECT * FROM qiita.prep_template AS pt
-                    LEFT JOIN qiita.common_prep_info AS cpi ON
+                    LEFT JOIN qiita.prep_template_sample AS cpi ON
                     (pt.prep_template_id=cpi.prep_template_id)
-                    LEFT JOIN qiita.required_sample_info AS rsi ON
+                    LEFT JOIN qiita.study_sample AS rsi ON
                     (cpi.sample_id=rsi.sample_id)
                 WHERE raw_data_id = {0} and study_id = {1}
             )
diff --git a/qiita_db/search.py b/qiita_db/search.py
@@ -116,8 +116,6 @@ def __repr__(self):
 
 
 class SearchTerm(object):
-    # column names from required_sample_info table
-    required_cols = set(get_table_cols("required_sample_info"))
     # column names from study table
     study_cols = set(get_table_cols("study"))
 
@@ -128,7 +126,7 @@ def __init__(self, tokens):
             self.term[pos] = scrub_data(term)
 
     def generate_sql(self):
-        # we can assume that the metadata is either in required_sample_info
+        # we can assume that the metadata is either in study_sample
         # or the study-specific table
         column_name, operator, argument = self.term
         argument_type = type(convert_type(argument))
@@ -140,9 +138,7 @@ def generate_sql(self):
         if operator not in allowable_types[argument_type]:
             raise QiitaDBIncompatibleDatatypeError(operator, argument_type)
 
-        if column_name in self.required_cols:
-            column_name = "r.%s" % column_name.lower()
-        elif column_name in self.study_cols:
+        if column_name in self.study_cols:
             column_name = "st.%s" % column_name.lower()
         else:
             column_name = "sa.%s" % column_name.lower()
@@ -167,8 +163,6 @@ def __repr__(self):
 class QiitaStudySearch(object):
     """QiitaStudySearch object to parse and run searches on studies."""
 
-    # column names from required_sample_info table
-    required_cols = set(get_table_cols("required_sample_info"))
     # column names from study table
     study_cols = set(get_table_cols("study"))
 
@@ -310,9 +304,9 @@ def _parse_study_search_string(self, searchstr,
                     meta_header_type_lookup[header] = 'varchar'
 
         # create the study finding SQL
-        # remove metadata headers that are in required_sample_info table
-        meta_headers = tuple(meta_headers.difference(
-            self.required_cols).difference(self.study_cols))
+        # remove metadata headers that are in study table
+        meta_headers.discard('sample_id')
+        meta_headers = tuple(meta_headers.difference(self.study_cols))
 
         # get all study ids that contain all metadata categories searched for
         sql = []
@@ -341,17 +335,17 @@ def _parse_study_search_string(self, searchstr,
         # build the sql formatted list of metadata headers
         header_info = []
         for meta in meta_header_type_lookup:
-            if meta in self.required_cols:
-                header_info.append("r.%s" % meta)
-            elif meta in self.study_cols:
+            if meta in self.study_cols:
                 header_info.append("st.%s" % meta)
             else:
                 header_info.append("sa.%s" % meta)
         # build the SQL query
-        sample_sql = ("SELECT r.sample_id,%s FROM qiita.required_sample_info "
-                      "r JOIN qiita.sample_{0} sa ON sa.sample_id = "
-                      "r.sample_id JOIN qiita.study st ON st.study_id = "
-                      "r.study_id WHERE %s" %
+
+        sample_sql = ("SELECT ss.sample_id,%s "
+                      "FROM qiita.study_sample ss "
+                      "JOIN qiita.sample_{0} sa USING (sample_id) "
+                      "JOIN qiita.study st USING (study_id) "
+                      "WHERE %s" %
                       (','.join(header_info), sql_where))
         return study_sql, sample_sql, meta_header_type_lookup.keys()
 
diff --git a/qiita_db/test/test_search.py b/qiita_db/test/test_search.py
@@ -27,10 +27,11 @@ def test_parse_study_search_string(self):
         exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                       "lower(column_name) = lower('altitude') and column_type "
                       "in ('integer', 'float8')")
-        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE sa.altitude > 0")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE sa.altitude > 0")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ["altitude"])
@@ -41,11 +42,11 @@ def test_parse_study_search_string(self):
         exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                       "lower(column_name) = lower('altitude') and column_type "
                       "in ('integer', 'float8')")
-        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE NOT "
-                        "sa.altitude > 0")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE NOT sa.altitude > 0")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ["altitude"])
@@ -56,11 +57,11 @@ def test_parse_study_search_string(self):
         exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                       "lower(column_name) = lower('ph') and column_type in "
                       "('integer', 'float8')")
-        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE (sa.ph > 7 AND "
-                        "sa.ph < 9)")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE (sa.ph > 7 AND sa.ph < 9)")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ["ph"])
@@ -71,11 +72,11 @@ def test_parse_study_search_string(self):
         exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                       "lower(column_name) = lower('ph') and column_type in "
                       "('integer', 'float8')")
-        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
-                        "sa.ph < 9)")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE (sa.ph > 7 OR sa.ph < 9)")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ["ph"])
@@ -84,12 +85,15 @@ def test_parse_study_search_string(self):
         st_sql, samp_sql, meta = \
             self.search._parse_study_search_string(
                 'host_subject_id includes "Chicken little"')
-        exp_st_sql = "SELECT study_id FROM qiita.study_sample_columns"
-        exp_samp_sql = ("SELECT r.sample_id,r.host_subject_id FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE "
-                        "LOWER(r.host_subject_id) LIKE '%chicken little%'")
+        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns "
+                      "WHERE lower(column_name) = lower('host_subject_id') "
+                      "and column_type in ('varchar')")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.host_subject_id "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE LOWER(sa.host_subject_id) "
+                        "LIKE '%chicken little%'")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ["host_subject_id"])
@@ -104,11 +108,12 @@ def test_parse_study_search_string(self):
             "lower(column_name) = lower('name') and column_type in "
             "('varchar')")
         exp_samp_sql = (
-            "SELECT r.sample_id,sa.name FROM qiita.required_sample_info r JOIN"
-            " qiita.sample_{0} sa ON sa.sample_id = r.sample_id JOIN "
-            "qiita.study st ON st.study_id = r.study_id WHERE (sa.name = "
-            "'Billy Bob' OR sa.name = 'Timmy' OR (sa.name = 'Jimbo' AND "
-            "sa.name > 25) OR sa.name < 5)")
+            "SELECT ss.sample_id,sa.name "
+            "FROM qiita.study_sample ss "
+            "JOIN qiita.sample_{0} sa USING (sample_id) "
+            "JOIN qiita.study st USING (study_id) "
+            "WHERE (sa.name = 'Billy Bob' OR sa.name = 'Timmy' OR "
+            "(sa.name = 'Jimbo' AND sa.name > 25) OR sa.name < 5)")
         self.assertEqual(st_sql, exp_st_sql)
         self.assertEqual(samp_sql, exp_samp_sql)
         self.assertEqual(meta, ['name'])
@@ -124,11 +129,11 @@ def test_parse_study_search_string(self):
                       "('integer', 'float8')", "SELECT study_id FROM "
                       "qiita.study_sample_columns WHERE lower(column_name) = "
                       "lower('ph') and column_type in ('integer', 'float8')"]
-        exp_samp_sql = ("SELECT r.sample_id,sa.pH,sa.ph FROM "
-                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
-                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
-                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
-                        "sa.ph < 9)")
+        exp_samp_sql = ("SELECT ss.sample_id,sa.pH,sa.ph "
+                        "FROM qiita.study_sample ss "
+                        "JOIN qiita.sample_{0} sa USING (sample_id) "
+                        "JOIN qiita.study st USING (study_id) "
+                        "WHERE (sa.ph > 7 OR sa.ph < 9)")
         # use the split list to make sure the SQL is properly formed
         self.assertEqual(len(st_sql), 2)
         pos = exp_st_sql.index(st_sql[0])
diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py
@@ -54,10 +54,10 @@ def test_study_raw_data(self):
         self.assertEqual(get_count("qiita.study_raw_data"), 4)
 
     def test_required_sample_info(self):
-        self.assertEqual(get_count("qiita.required_sample_info"), 27)
+        self.assertEqual(get_count("qiita.study_sample"), 27)
 
     def test_study_sample_columns(self):
-        self.assertEqual(get_count("qiita.study_sample_columns"), 21)
+        self.assertEqual(get_count("qiita.study_sample_columns"), 30)
 
     def test_sample_1(self):
         self.assertEqual(get_count("qiita.sample_1"), 27)
@@ -66,10 +66,10 @@ def test_prep_template(self):
         self.assertEqual(get_count("qiita.prep_template"), 1)
 
     def test_common_prep_info(self):
-        self.assertEqual(get_count("qiita.common_prep_info"), 27)
+        self.assertEqual(get_count("qiita.prep_template_sample"), 27)
 
     def test_prep_columns(self):
-        self.assertEqual(get_count("qiita.prep_columns"), 19)
+        self.assertEqual(get_count("qiita.prep_columns"), 22)
 
     def test_prep_1(self):
         self.assertEqual(get_count("qiita.prep_1"), 27)
diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
@@ -26,8 +26,7 @@
                            check_count, get_processed_params_tables,
                            params_dict_to_json, insert_filepaths,
                            get_db_files_base_dir, get_data_types,
-                           get_required_sample_info_status,
-                           get_emp_status, purge_filepaths, get_filepath_id,
+                           purge_filepaths, get_filepath_id,
                            get_lat_longs, get_mountpoint,
                            get_mountpoint_path_by_id,
                            get_files_from_uploads_folders,
@@ -101,7 +100,7 @@ def test_get_lat_longs(self):
             [38.2627021402, 3.48274264219]]
 
         obs = get_lat_longs()
-        self.assertEqual(obs, exp)
+        self.assertItemsEqual(obs, exp)
 
     def test_check_table_cols(self):
         # Doesn't do anything if correct info passed, only errors if wrong info
@@ -236,28 +235,6 @@ def test_get_data_types(self):
         exp = {v: k for k, v in exp.items()}
         self.assertEqual(obs, exp)
 
-    def test_get_required_sample_info_status(self):
-        """Tests that get_required_sample_info_status works"""
-        obs = get_required_sample_info_status()
-        exp = {'received': 1, 'in_preparation': 2, 'running': 3,
-               'completed': 4}
-        self.assertEqual(obs, exp)
-
-        obs = get_required_sample_info_status(
-            key='required_sample_info_status_id')
-        exp = {v: k for k, v in exp.items()}
-        self.assertEqual(obs, exp)
-
-    def test_get_emp_status(self):
-        """Tests that get_emp_status works"""
-        obs = get_emp_status()
-        exp = {'EMP': 1, 'EMP_Processed': 2, 'NOT_EMP': 3}
-        self.assertEqual(obs, exp)
-
-        obs = get_emp_status(key='emp_status_id')
-        exp = {v: k for k, v in exp.items()}
-        self.assertEqual(obs, exp)
-
     def test_get_count(self):
         """Checks that get_count retrieves proper count"""
         self.assertEqual(get_count('qiita.study_person'), 3)
diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -212,62 +212,6 @@ def get_data_types(key='data_type'):
     return dict(con.execute_fetchall(sql))
 
 
-def get_required_sample_info_status(key='status'):
-    """Gets the list of possible required sample info status
-
-    Parameters
-    ----------
-    key : {'status', 'required_sample_info_status_id'}, optional
-        Defaults to 'status'. Determines the format of the returned dict.
-
-    Returns
-    -------
-    dict
-        - If `key` is "status", dict is of the form
-          {status: required_sample_info_status_id}
-        - If `key` is "required_sample_info_status_id", dict is of the form
-          {required_sample_info_status_id: status}
-    """
-    con = SQLConnectionHandler()
-    if key == 'status':
-        cols = 'status, required_sample_info_status_id'
-    elif key == 'required_sample_info_status_id':
-        cols = 'required_sample_info_status_id, status'
-    else:
-        raise QiitaDBColumnError("Unknown key. Pass either 'status' or "
-                                 "'required_sample_info_status_id'")
-    sql = 'select {} from qiita.required_sample_info_status'.format(cols)
-    return dict(con.execute_fetchall(sql))
-
-
-def get_emp_status(key='emp_status'):
-    """Gets the list of possible emp statuses
-
-    Parameters
-    ----------
-    key : {'emp_status', 'emp_status_id'}, optional
-        Defaults to 'status'. Determines the format of the returned dict.
-
-    Returns
-    -------
-    dict
-        - If `key` is "emp_status", dict is of the form
-          {emp_status: emp_status_id}
-        - If `key` is "emp_status_id", dict is of the form
-          {emp_status_id: emp_status}
-    """
-    con = SQLConnectionHandler()
-    if key == 'emp_status':
-        cols = 'emp_status, emp_status_id'
-    elif key == 'emp_status_id':
-        cols = 'emp_status_id, emp_status'
-    else:
-        raise QiitaDBColumnError("Unknown key. Pass either 'emp_status' or "
-                                 "'emp_status_id'")
-    sql = 'select {} from qiita.emp_status'.format(cols)
-    return dict(con.execute_fetchall(sql))
-
-
 def create_rand_string(length, punct=True):
     """Returns a string of random ascii characters
 
@@ -1007,10 +951,27 @@ def get_processed_params_tables():
 
 
 def get_lat_longs():
+    """Retrieve the latitude and longitude of all the samples in the DB
+
+    Returns
+    -------
+    list of [float, float]
+        The latitude and longitude for each sample in the database
+    """
     conn = SQLConnectionHandler()
-    sql = """select latitude, longitude
-             from qiita.required_sample_info"""
-    return conn.execute_fetchall(sql)
+    sql = """SELECT DISTINCT table_name
+             FROM information_schema.columns
+             WHERE SUBSTR(table_name, 1, 7) = 'sample_'
+                AND table_schema = 'qiita'
+                AND column_name IN ('latitude', 'longitude');"""
+    tables_gen = (t[0] for t in conn.execute_fetchall(sql))
+
+    sql = "SELECT latitude, longitude FROM qiita.{0}"
+    result = []
+    for table in tables_gen:
+        result.extend(conn.execute_fetchall(sql.format(table)))
+
+    return result
 
 
 def get_environmental_packages(conn_handler=None):