|
37 | 37 |
|
38 | 38 | from __future__ import division
|
39 | 39 | from future.utils import viewitems, viewvalues
|
| 40 | +from future.builtins import zip |
40 | 41 | from os.path import join
|
41 | 42 | from functools import partial
|
42 | 43 | from collections import defaultdict
|
43 | 44 | from copy import deepcopy
|
44 | 45 |
|
45 | 46 | import pandas as pd
|
46 | 47 | from skbio.util import find_duplicates
|
| 48 | +import warnings |
47 | 49 |
|
48 | 50 | from qiita_core.exceptions import IncompetentQiitaDeveloperError
|
49 | 51 |
|
50 | 52 | from qiita_db.exceptions import (QiitaDBUnknownIDError, QiitaDBColumnError,
|
51 |
| - QiitaDBNotImplementedError, |
52 |
| - QiitaDBExecutionError, |
| 53 | + QiitaDBNotImplementedError, QiitaDBError, |
| 54 | + QiitaDBExecutionError, QiitaDBWarning, |
53 | 55 | QiitaDBDuplicateHeaderError)
|
54 | 56 | from qiita_db.base import QiitaObject
|
55 | 57 | from qiita_db.sql_connection import SQLConnectionHandler
|
56 |
| -from qiita_db.util import (exists_table, get_table_cols, |
57 |
| - convert_to_id, |
| 58 | +from qiita_db.util import (exists_table, get_table_cols, convert_to_id, |
58 | 59 | get_mountpoint, insert_filepaths)
|
59 | 60 | from qiita_db.logger import LogEntry
|
60 | 61 | from .util import (as_python_types, get_datatypes, get_invalid_sample_names,
|
@@ -772,6 +773,126 @@ def _add_common_creation_steps_to_queue(cls, md_template, obj_id,
|
772 | 773 | ', '.join(["%s"] * len(headers))),
|
773 | 774 | values, many=True)
|
774 | 775 |
|
| 776 | + def _add_common_extend_steps_to_queue(self, md_template, conn_handler, |
| 777 | + queue_name): |
| 778 | + r"""Adds the common extend steps to the queue in conn_handler |
| 779 | +
|
| 780 | + Parameters |
| 781 | + ---------- |
| 782 | + md_template : DataFrame |
| 783 | + The metadata template file contents indexed by sample ids |
| 784 | + conn_handler : SQLConnectionHandler |
| 785 | + The connection handler object connected to the DB |
| 786 | + queue_name : str |
| 787 | + The queue where the SQL statements will be added |
| 788 | +
|
| 789 | + Raises |
| 790 | + ------ |
| 791 | + QiitaDBError |
| 792 | + If no new samples or new columns are present in `md_template` |
| 793 | + """ |
| 794 | + # Check if we are adding new samples |
| 795 | + sample_ids = md_template.index.tolist() |
| 796 | + curr_samples = set(self.keys()) |
| 797 | + existing_samples = curr_samples.intersection(sample_ids) |
| 798 | + new_samples = set(sample_ids).difference(existing_samples) |
| 799 | + |
| 800 | + # Check if we are adding new columns, by getting all the columns from |
| 801 | + # the database |
| 802 | + table_name = self._table_name(self._id) |
| 803 | + db_cols = get_table_cols(self._table, conn_handler) |
| 804 | + db_cols.remove('sample_id') |
| 805 | + db_cols.remove(self._id_column) |
| 806 | + curr_cols = set( |
| 807 | + get_table_cols(table_name, conn_handler)).union(db_cols) |
| 808 | + headers = md_template.keys().tolist() |
| 809 | + existing_cols = curr_cols.intersection(headers) |
| 810 | + new_cols = set(headers).difference(existing_cols) |
| 811 | + |
| 812 | + if not new_cols and not new_samples: |
| 813 | + raise QiitaDBError( |
| 814 | + "No new samples or new columns found in the template. If you " |
| 815 | + "want to update existing values, you should use the 'update' " |
| 816 | + "functionality.") |
| 817 | + |
| 818 | + if new_cols: |
| 819 | + # If we are adding new columns, add them first (simplifies code) |
| 820 | + # Sorting the new columns to enforce an order |
| 821 | + new_cols = sorted(new_cols) |
| 822 | + datatypes = get_datatypes(md_template.ix[:, new_cols]) |
| 823 | + sql_cols = """INSERT INTO qiita.{0} ({1}, column_name, column_type) |
| 824 | + VALUES (%s, %s, %s)""".format(self._column_table, |
| 825 | + self._id_column) |
| 826 | + sql_alter = """ALTER TABLE qiita.{0} ADD COLUMN {1} {2}""" |
| 827 | + for category, dtype in zip(new_cols, datatypes): |
| 828 | + conn_handler.add_to_queue( |
| 829 | + queue_name, sql_cols, (self._id, category, dtype)) |
| 830 | + conn_handler.add_to_queue( |
| 831 | + queue_name, sql_alter.format(table_name, category, dtype)) |
| 832 | + |
| 833 | + if existing_samples: |
| 834 | + warnings.warn( |
| 835 | + "No values have been modified for samples '%s'. However, " |
| 836 | + "the following columns have been added to them: '%s'" |
| 837 | + % (", ".join(existing_samples), ", ".join(new_cols)), |
| 838 | + QiitaDBWarning) |
| 839 | + # The values for the new columns are the only ones that get |
| 840 | + # added to the database. None of the existing values will be |
| 841 | + # modified (see update for that functionality) |
| 842 | + min_md_template = md_template[new_cols].loc[existing_samples] |
| 843 | + values = as_python_types(min_md_template, new_cols) |
| 844 | + values.append(existing_samples) |
| 845 | + # psycopg2 requires a list of tuples, in which each tuple is a |
| 846 | + # set of values to use in the string formatting of the query. |
| 847 | + # We have all the values in different lists (but in the same |
| 848 | + # order) so use zip to create the list of tuples that psycopg2 |
| 849 | + # requires. |
| 850 | + values = [v for v in zip(*values)] |
| 851 | + set_str = ["{0} = %s".format(col) for col in new_cols] |
| 852 | + sql = """UPDATE qiita.{0} |
| 853 | + SET {1} |
| 854 | + WHERE sample_id=%s""".format(table_name, |
| 855 | + ",".join(set_str)) |
| 856 | + conn_handler.add_to_queue(queue_name, sql, values, many=True) |
| 857 | + elif existing_samples: |
| 858 | + warnings.warn( |
| 859 | + "The following samples already exist in the template and " |
| 860 | + "will be ignored: %s" % ", ".join(existing_samples), |
| 861 | + QiitaDBWarning) |
| 862 | + |
| 863 | + if new_samples: |
| 864 | + num_samples = len(new_samples) |
| 865 | + new_samples = sorted(new_samples) |
| 866 | + # At this point we only want the information from the new samples |
| 867 | + md_template = md_template.loc[new_samples] |
| 868 | + |
| 869 | + # Insert values on required columns |
| 870 | + values = as_python_types(md_template, db_cols) |
| 871 | + values.insert(0, new_samples) |
| 872 | + values.insert(0, [self._id] * num_samples) |
| 873 | + # psycopg2 requires a list of tuples, in which each tuple is a |
| 874 | + # tuple of values to use in the string formatting of the query. We |
| 875 | + # have all the values in different lists (but in the same order) so |
| 876 | + # use zip to create the list of tuples that psycopg2 requires. |
| 877 | + values = [v for v in zip(*values)] |
| 878 | + sql = """INSERT INTO qiita.{0} ({1}, sample_id, {2}) |
| 879 | + VALUES (%s, %s, {3})""".format( |
| 880 | + self._table, self._id_column, ', '.join(db_cols), |
| 881 | + ', '.join(['%s'] * len(db_cols))) |
| 882 | + conn_handler.add_to_queue(queue_name, sql, values, many=True) |
| 883 | + |
| 884 | + headers = sorted(set(headers).difference(db_cols)) |
| 885 | + |
| 886 | + # Insert values on custom table |
| 887 | + values = as_python_types(md_template, headers) |
| 888 | + values.insert(0, new_samples) |
| 889 | + values = [v for v in zip(*values)] |
| 890 | + sql = """INSERT INTO qiita.{0} (sample_id, {1}) |
| 891 | + VALUES (%s, {2})""".format( |
| 892 | + table_name, ", ".join(headers), |
| 893 | + ', '.join(["%s"] * len(headers))) |
| 894 | + conn_handler.add_to_queue(queue_name, sql, values, many=True) |
| 895 | + |
775 | 896 | @classmethod
|
776 | 897 | def exists(cls, obj_id):
|
777 | 898 | r"""Checks if already exists a MetadataTemplate for the provided object
|
@@ -1193,7 +1314,7 @@ def update_category(self, category, samples_and_values):
|
1193 | 1314 | """
|
1194 | 1315 | if not set(self.keys()).issuperset(samples_and_values):
|
1195 | 1316 | missing = set(self.keys()) - set(samples_and_values)
|
1196 |
| - table_name = self._table_name(self.study_id) |
| 1317 | + table_name = self._table_name(self._id) |
1197 | 1318 | raise QiitaDBUnknownIDError(missing, table_name)
|
1198 | 1319 |
|
1199 | 1320 | conn_handler = SQLConnectionHandler()
|
|
0 commit comments