-
Couldn't load subscription status.
- Fork 79
Pyqi port #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pyqi port #10
Changes from 2 commits
2aff879
3badafc
f549adf
378857c
9a69250
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import division | ||
|
|
||
| __author__ = "Jose Antonio Navas Molina" | ||
| __copyright__ = "Copyright 2013, The Qiita project" | ||
| __credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"] | ||
| __license__ = "BSD" | ||
| __version__ = "0.1.0-dev" | ||
| __maintainer__ = "Jose Antonio Navas Molina" | ||
| __email__ = "josenavasmolina@gmail.com" | ||
|
|
||
| from qiita_db.backends.sql.add_mapping_file import (add_mapping_file as | ||
| sql_add_mapping_file) | ||
|
|
||
| add_mapping_file = sql_add_mapping_file |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,8 @@ | |
| __email__ = "josenavasmolina@gmail.edu" | ||
| __status__ = "Development" | ||
|
|
||
| from qiita_db.backends.dev.user_storage.py import UserStorage as DEVUser | ||
| from qiita_db.backends.dev.analysis_storage.py import AnalysisStorage as DEVAnalysis | ||
| from qiita_db.backends.dev.study_storage.py import StudyStorage as DEVStudy | ||
| from qiita_db.backends.dev.sample_storage.py import SampleStorage as DEVSample | ||
| from qiita_db.backends.dev.job_storage.py import JobStorage as DEVJob | ||
| from qiita_db.backends.dev.user_storage import UserStorage as DEVUser | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the purpose of having a dev folder? |
||
| from qiita_db.backends.dev.analysis_storage import AnalysisStorage as DEVAnalysis | ||
| from qiita_db.backends.dev.study_storage import StudyStorage as DEVStudy | ||
| from qiita_db.backends.dev.sample_storage import SampleStorage as DEVSample | ||
| from qiita_db.backends.dev.job_storage import JobStorage as DEVJob | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| #!/usr/bin/env python | ||
| from __future__ import division | ||
|
|
||
| __author__ = "Jose Antonio Navas Molina" | ||
| __copyright__ = "Copyright 2013, The Qiita project" | ||
| __credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"] | ||
| __license__ = "BSD" | ||
| __version__ = "0.1.0-dev" | ||
| __maintainer__ = "Jose Antonio Navas Molina" | ||
| __email__ = "josenavasmolina@gmail.com" | ||
|
|
||
| from itertools import izip | ||
| from qiita_db.backends.sql.exceptions import QiitaDBSQLExecutionError | ||
| from qiita_db.backends.sql.utils import (get_postgres_cursor, sql_execute, | ||
| sql_executemany) | ||
|
|
||
|
|
||
| def scrub_data(s): | ||
| """Scrubs data fields of characters not allowed by PostgreSQL | ||
|
|
||
| disallowed characters: | ||
| ' | ||
| """ | ||
| ret = s.replace("'", "") | ||
| return ret | ||
|
|
||
|
|
||
| def quote_column_name(c): | ||
| """Lowercases the string and puts double quotes around it | ||
| """ | ||
| return '"%s"' % c.lower() | ||
|
|
||
|
|
||
| def quote_data_value(c): | ||
| """Puts single quotes around a string""" | ||
| return "'%s'" % c | ||
|
|
||
|
|
||
| def str_or_none(value): | ||
| """Returns a string version of the input, or None | ||
|
|
||
| If there is no value (e.g., empty string), or the value is 'None', | ||
| returns None. | ||
| """ | ||
| if not value or value == 'None': | ||
| return None | ||
|
|
||
| return str(value) | ||
|
|
||
|
|
||
| def int_or_none(value): | ||
| """Returns a int version of the input, or None | ||
|
|
||
| If there is no value (e.g., empty string), or the value is 'None', | ||
| returns None. | ||
| """ | ||
| if not value or value == 'None': | ||
| return None | ||
|
|
||
| return int(value) | ||
|
|
||
|
|
||
| def float_or_none(value): | ||
| """Returns a float version of the input, or None | ||
|
|
||
| If there is no value (e.g., empty string), or the value is 'None', | ||
| returns None. | ||
| """ | ||
| if not value or value == 'None': | ||
| return None | ||
|
|
||
| return float(value) | ||
|
|
||
|
|
||
| def add_mapping_file(study_id, mapping, headers, datatypes, clear_tables): | ||
| """ Adds the mapping file to the SQL database | ||
|
|
||
| Inputs: | ||
| study_id: study id | ||
| mapping: a dict of dicts representing the mapping file. Outer keys are | ||
| sample names and inner keys are column headers. | ||
| headers: a list of column headers | ||
| datatypes: The datatypes of the columns, automatically determined to | ||
| be varchar, int, or float | ||
| clear_tables: if true, drop the study's table and delete the rows for | ||
| that table from the column_tables table | ||
| """ | ||
| # Get the table name | ||
| table_name = "study_%s" % study_id | ||
|
|
||
| # Get the postgres cursor to execute the queries | ||
| cur = get_postgres_cursor() | ||
|
|
||
| # if clear_tables is true, drop the study's table and delete the rows for | ||
| # that table from the column_tables table | ||
| if clear_tables: | ||
| # Dropping table | ||
| try: | ||
| sql_execute(cur, 'drop table %s' % table_name, None) | ||
| except QiitaDBSQLExecutionError: | ||
| # The table did not already exist, but that's okay, just skip | ||
| pass | ||
|
|
||
| # Deleting rows from column_tables for the study | ||
| # Do not need try/except here because the query should never fail; | ||
| # even when there are no rows for this study, the query will | ||
| # do nothing but complete successfully | ||
| sql_execute(cur, "delete from column_tables where table_name = %s", | ||
| (table_name,)) | ||
|
|
||
| # Get the columns names in SQL safe | ||
| sql_safe_column_names = [quote_column_name(h) for h in headers] | ||
|
|
||
| # Get the column names paired with its datatype for SQL | ||
| columns = [] | ||
| for column_name, datatype in izip(sql_safe_column_names, datatypes): | ||
| columns.append('%s %s' % (column_name, datatype)) | ||
| # Get the columns in a comma-separated string | ||
| columns = ", ".join(columns) | ||
| # Create a table for the study | ||
| sql_execute(cur, "create table %s (sampleid varchar, %s)" % | ||
| (table_name, columns), None) | ||
|
|
||
| # Might as well do this to avoid the attribute lookup... probably not | ||
| # a huge amount of speedup, but I'm never using any other kind of "lower" | ||
| # or "isdigit" function... | ||
| lower = str.lower | ||
|
||
| # Add rows to the column_tables table | ||
| lc_table_name = lower(table_name) | ||
| quoted_lc_table_name = quote_data_value(lc_table_name) | ||
| column_tables_sql_template = ("insert into column_tables (column_name, " | ||
| "table_name, datatype) values (%s, " + | ||
| quoted_lc_table_name+", %s)") | ||
| lc_headers = [lower(h) for h in headers] | ||
| quoted_lc_headers = [quote_data_value(h) for h in lc_headers] | ||
| sql_args_list = [(column_name, datatype) for column_name, datatype in | ||
| izip(quoted_lc_headers, datatypes)] | ||
| sql_executemany(cur, column_tables_sql_template, sql_args_list) | ||
|
|
||
| # Add rows into the study table | ||
| columns = ', '.join(sql_safe_column_names) | ||
| insert_sql_template = ('insert into '+table_name+' (sampleid, ' + | ||
| columns+') values (%s)') | ||
|
|
||
| sql_args_list = [] | ||
| for sample_id, data in mapping.iteritems(): | ||
| values = [quote_data_value(scrub_data(sample_id))] | ||
| values += [quote_data_value(scrub_data(data[header])) | ||
| for header in headers] | ||
|
|
||
| values = ', '.join(values) | ||
|
|
||
| # Replace 'None' with null. This might be dangerous if a mapping file | ||
| # actually has None as a valid data value! | ||
| values = values.replace(", 'None'", ", null") | ||
| sql_execute(cur, insert_sql_template % values, None) | ||
|
|
||
| cur.close() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| __author__ = "Jose Antonio Navas Molina" | ||
| __copyright__ = "Copyright 2013, The Qiita project" | ||
| __credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"] | ||
| __license__ = "BSD" | ||
| __version__ = "0.1.0-dev" | ||
| __maintainer__ = "Jose Antonio Navas Molina" | ||
| __email__ = "josenavasmolina@gmail.com" | ||
|
|
||
| from psycopg2 import connect | ||
|
|
||
| # Connect to the database | ||
| postgres = connect(user='defaultuser', database='qiime_md', host='localhost') |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| __author__ = "Jose Antonio Navas Molina" | ||
| __copyright__ = "Copyright 2013, The Qiita project" | ||
| __credits__ = ["Jose Antonio Navas Molina"] | ||
| __license__ = "BSD" | ||
| __version__ = "0.1.0-dev" | ||
| __maintainer__ = "Jose Antonio Navas Molina" | ||
| __email__ = "josenavasmolina@gmail.com" | ||
|
|
||
| from qiita_db.core.exceptions import QiitaDBError | ||
|
|
||
|
|
||
| class QiitaDBSQLError(QiitaDBError): | ||
| """Base class for all Qiita-db SQL backend errors""" | ||
| pass | ||
|
|
||
|
|
||
| class QiitaDBSQLExecutionError(QiitaDBSQLError): | ||
| """Exception for error when executing SQL queries""" | ||
| pass | ||
|
|
||
|
|
||
| class QiitaBDSQLParseError(QiitaDBError): | ||
| """Exception for error when parsing files""" | ||
| pass |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| __author__ = "Jose Antonio Navas Molina" | ||
| __copyright__ = "Copyright 2013, The Qiita project" | ||
| __credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"] | ||
| __license__ = "BSD" | ||
| __version__ = "0.1.0-dev" | ||
| __maintainer__ = "Jose Antonio Navas Molina" | ||
| __email__ = "josenavasmolina@gmail.com" | ||
|
|
||
| from qiita_db.backends.sql.exceptions import QiitaBDSQLParseError | ||
|
|
||
|
|
||
| def parse_mapping_file_to_dicts(lines): | ||
|
||
| """Parses a QIIME mapping file. | ||
|
|
||
| Inputs: | ||
| lines: Mapping file lines | ||
|
|
||
| Returns: | ||
| mapping: a dict of dicts representing the mapping file. Outer keys are | ||
| sample names and inner keys are column headers. | ||
| headers: A list of column headers | ||
| datatypes The datatypes of the columns, automatically determined to be | ||
| varchar, int, or float | ||
| """ | ||
| # might as well do this to avoid attribute lookups | ||
| isdigit = str.isdigit | ||
|
|
||
| # Find first non-comment line, assume the previous line (i.e., the last | ||
| # comment line at the top of the file) is the headers | ||
| headers = [] | ||
| prev_line = '' | ||
| for line in lines: | ||
| if line.startswith('#'): | ||
| prev_line = line | ||
| continue | ||
| else: | ||
| headers = prev_line.strip().split('\t')[1:] | ||
| num_columns = len(headers) | ||
| break | ||
|
|
||
| # if we get here and don't have headers, abort | ||
| if not headers: | ||
| raise QiitaBDSQLParseError("Empty mapping file! Aborting.") | ||
|
|
||
| # seek back to the beginning of the file, and read in the data (skip | ||
| # comment lines) | ||
| lines.seek(0) | ||
| mapping = {} | ||
| for line in lines: | ||
| if line.startswith('#'): | ||
| continue | ||
| elements = [e.strip() for e in line.split('\t')] | ||
| sample_id, data = elements[0], elements[1:] | ||
| data = dict(zip(headers, data)) | ||
| mapping[sample_id] = data | ||
|
|
||
| # determine datatypes | ||
| datatypes = [] | ||
| sample_ids = mapping.keys() | ||
| for header in headers: | ||
| column_data = [mapping[sample_id][header] for sample_id in sample_ids] | ||
|
|
||
| if all([isdigit(c) for c in column_data]): | ||
| datatype = 'int' | ||
| elif all([isdigit(c.replace('.', '', 1)) for c in column_data]): | ||
| datatype = 'float' | ||
| else: | ||
| datatype = 'varchar' | ||
|
|
||
| datatypes.append(datatype) | ||
|
|
||
| return mapping, headers, datatypes | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should list or create a list all the possible backends and verify that the default is within that list.