Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions qiita_db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from qiita_db.backends.dev import (DEVUser, DEVAnalysis, DEVStudy, DEVSample,
DEVJob)

BACKEND = "SQL"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should list or create a list all the possible backends and verify that the default is within that list.


if BACKEND == "SQL":
UserStorage = SQLUser
AnalysisStorage = SQLAnalysis
Expand Down
15 changes: 15 additions & 0 deletions qiita_db/add_mapping_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env python
from __future__ import division

__author__ = "Jose Antonio Navas Molina"
__copyright__ = "Copyright 2013, The Qiita project"
__credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"]
__license__ = "BSD"
__version__ = "0.1.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "josenavasmolina@gmail.com"

from qiita_db.backends.sql.add_mapping_file import (add_mapping_file as
sql_add_mapping_file)

add_mapping_file = sql_add_mapping_file
10 changes: 5 additions & 5 deletions qiita_db/backends/dev/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
__email__ = "josenavasmolina@gmail.edu"
__status__ = "Development"

from qiita_db.backends.dev.user_storage.py import UserStorage as DEVUser
from qiita_db.backends.dev.analysis_storage.py import AnalysisStorage as DEVAnalysis
from qiita_db.backends.dev.study_storage.py import StudyStorage as DEVStudy
from qiita_db.backends.dev.sample_storage.py import SampleStorage as DEVSample
from qiita_db.backends.dev.job_storage.py import JobStorage as DEVJob
from qiita_db.backends.dev.user_storage import UserStorage as DEVUser
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the purpose of having a dev folder?

from qiita_db.backends.dev.analysis_storage import AnalysisStorage as DEVAnalysis
from qiita_db.backends.dev.study_storage import StudyStorage as DEVStudy
from qiita_db.backends.dev.sample_storage import SampleStorage as DEVSample
from qiita_db.backends.dev.job_storage import JobStorage as DEVJob
10 changes: 5 additions & 5 deletions qiita_db/backends/fs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
__email__ = "josenavasmolina@gmail.edu"
__status__ = "Development"

from qiita_db.backends.fs.user_storage.py import UserStorage as FSUser
from qiita_db.backends.fs.analysis_storage.py import AnalysisStorage as FSAnalysis
from qiita_db.backends.fs.study_storage.py import StudyStorage as FSStudy
from qiita_db.backends.fs.sample_storage.py import SampleStorage as FSSample
from qiita_db.backends.fs.job_storage.py import JobStorage as FSJob
from qiita_db.backends.fs.user_storage import UserStorage as FSUser
from qiita_db.backends.fs.analysis_storage import AnalysisStorage as FSAnalysis
from qiita_db.backends.fs.study_storage import StudyStorage as FSStudy
from qiita_db.backends.fs.sample_storage import SampleStorage as FSSample
from qiita_db.backends.fs.job_storage import JobStorage as FSJob
10 changes: 5 additions & 5 deletions qiita_db/backends/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
__email__ = "josenavasmolina@gmail.edu"
__status__ = "Development"

from qiita_db.backends.sql.user_storage.py import UserStorage as SQLUser
from qiita_db.backends.sql.analysis_storage.py import AnalysisStorage as SQLAnalysis
from qiita_db.backends.sql.study_storage.py import StudyStorage as SQLStudy
from qiita_db.backends.sql.sample_storage.py import SampleStorage as SQLSample
from qiita_db.backends.sql.job_storage.py import JobStorage as SQLJob
from qiita_db.backends.sql.user_storage import UserStorage as SQLUser
from qiita_db.backends.sql.analysis_storage import AnalysisStorage as SQLAnalysis
from qiita_db.backends.sql.study_storage import StudyStorage as SQLStudy
from qiita_db.backends.sql.sample_storage import SampleStorage as SQLSample
from qiita_db.backends.sql.job_storage import JobStorage as SQLJob
158 changes: 158 additions & 0 deletions qiita_db/backends/sql/add_mapping_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/env python
from __future__ import division

__author__ = "Jose Antonio Navas Molina"
__copyright__ = "Copyright 2013, The Qiita project"
__credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"]
__license__ = "BSD"
__version__ = "0.1.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "josenavasmolina@gmail.com"

from itertools import izip
from qiita_db.backends.sql.exceptions import QiitaDBSQLExecutionError
from qiita_db.backends.sql.utils import (get_postgres_cursor, sql_execute,
sql_executemany)


def scrub_data(s):
"""Scrubs data fields of characters not allowed by PostgreSQL

disallowed characters:
'
"""
ret = s.replace("'", "")
return ret


def quote_column_name(c):
"""Lowercases the string and puts double quotes around it
"""
return '"%s"' % c.lower()


def quote_data_value(c):
"""Puts single quotes around a string"""
return "'%s'" % c


def str_or_none(value):
"""Returns a string version of the input, or None

If there is no value (e.g., empty string), or the value is 'None',
returns None.
"""
if not value or value == 'None':
return None

return str(value)


def int_or_none(value):
"""Returns a int version of the input, or None

If there is no value (e.g., empty string), or the value is 'None',
returns None.
"""
if not value or value == 'None':
return None

return int(value)


def float_or_none(value):
"""Returns a float version of the input, or None

If there is no value (e.g., empty string), or the value is 'None',
returns None.
"""
if not value or value == 'None':
return None

return float(value)


def add_mapping_file(study_id, mapping, headers, datatypes, clear_tables):
""" Adds the mapping file to the SQL database

Inputs:
study_id: study id
mapping: a dict of dicts representing the mapping file. Outer keys are
sample names and inner keys are column headers.
headers: a list of column headers
datatypes: The datatypes of the columns, automatically determined to
be varchar, int, or float
clear_tables: if true, drop the study's table and delete the rows for
that table from the column_tables table
"""
# Get the table name
table_name = "study_%s" % study_id

# Get the postgres cursor to execute the queries
cur = get_postgres_cursor()

# if clear_tables is true, drop the study's table and delete the rows for
# that table from the column_tables table
if clear_tables:
# Dropping table
try:
sql_execute(cur, 'drop table %s' % table_name, None)
except QiitaDBSQLExecutionError:
# The table did not already exist, but that's okay, just skip
pass

# Deleting rows from column_tables for the study
# Do not need try/except here because the query should never fail;
# even when there are no rows for this study, the query will
# do nothing but complete successfully
sql_execute(cur, "delete from column_tables where table_name = %s",
(table_name,))

# Get the columns names in SQL safe
sql_safe_column_names = [quote_column_name(h) for h in headers]

# Get the column names paired with its datatype for SQL
columns = []
for column_name, datatype in izip(sql_safe_column_names, datatypes):
columns.append('%s %s' % (column_name, datatype))
# Get the columns in a comma-separated string
columns = ", ".join(columns)
# Create a table for the study
sql_execute(cur, "create table %s (sampleid varchar, %s)" %
(table_name, columns), None)

# Might as well do this to avoid the attribute lookup... probably not
# a huge amount of speedup, but I'm never using any other kind of "lower"
# or "isdigit" function...
lower = str.lower
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alternative to this is to do from string import lower.

# Add rows to the column_tables table
lc_table_name = lower(table_name)
quoted_lc_table_name = quote_data_value(lc_table_name)
column_tables_sql_template = ("insert into column_tables (column_name, "
"table_name, datatype) values (%s, " +
quoted_lc_table_name+", %s)")
lc_headers = [lower(h) for h in headers]
quoted_lc_headers = [quote_data_value(h) for h in lc_headers]
sql_args_list = [(column_name, datatype) for column_name, datatype in
izip(quoted_lc_headers, datatypes)]
sql_executemany(cur, column_tables_sql_template, sql_args_list)

# Add rows into the study table
columns = ', '.join(sql_safe_column_names)
insert_sql_template = ('insert into '+table_name+' (sampleid, ' +
columns+') values (%s)')

sql_args_list = []
for sample_id, data in mapping.iteritems():
values = [quote_data_value(scrub_data(sample_id))]
values += [quote_data_value(scrub_data(data[header]))
for header in headers]

values = ', '.join(values)

# Replace 'None' with null. This might be dangerous if a mapping file
# actually has None as a valid data value!
values = values.replace(", 'None'", ", null")
sql_execute(cur, insert_sql_template % values, None)

cur.close()
12 changes: 12 additions & 0 deletions qiita_db/backends/sql/connections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
__author__ = "Jose Antonio Navas Molina"
__copyright__ = "Copyright 2013, The Qiita project"
__credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"]
__license__ = "BSD"
__version__ = "0.1.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "josenavasmolina@gmail.com"

from psycopg2 import connect

# Connect to the database
postgres = connect(user='defaultuser', database='qiime_md', host='localhost')
24 changes: 24 additions & 0 deletions qiita_db/backends/sql/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
__author__ = "Jose Antonio Navas Molina"
__copyright__ = "Copyright 2013, The Qiita project"
__credits__ = ["Jose Antonio Navas Molina"]
__license__ = "BSD"
__version__ = "0.1.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "josenavasmolina@gmail.com"

from qiita_db.core.exceptions import QiitaDBError


class QiitaDBSQLError(QiitaDBError):
"""Base class for all Qiita-db SQL backend errors"""
pass


class QiitaDBSQLExecutionError(QiitaDBSQLError):
"""Exception for error when executing SQL queries"""
pass


class QiitaBDSQLParseError(QiitaDBError):
"""Exception for error when parsing files"""
pass
72 changes: 72 additions & 0 deletions qiita_db/backends/sql/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
__author__ = "Jose Antonio Navas Molina"
__copyright__ = "Copyright 2013, The Qiita project"
__credits__ = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"]
__license__ = "BSD"
__version__ = "0.1.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "josenavasmolina@gmail.com"

from qiita_db.backends.sql.exceptions import QiitaBDSQLParseError


def parse_mapping_file_to_dicts(lines):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using MetadataMap.fromfile?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 @adamrp do we still want to move MetadataMap to bipy?

2014-02-11 14:24 GMT-07:00 Daniel McDonald notifications@github.com:

In qiita_db/backends/sql/parse.py:

@@ -0,0 +1,72 @@
+author = "Jose Antonio Navas Molina"
+copyright = "Copyright 2013, The Qiita project"
+credits = ["Jose Antonio Navas Molina", "Adam Robbins-Pianka"]
+license = "BSD"
+version = "0.1.0-dev"
+maintainer = "Jose Antonio Navas Molina"
+email = "josenavasmolina@gmail.com"
+
+from qiita_db.backends.sql.exceptions import QiitaBDSQLParseError
+
+
+def parse_mapping_file_to_dicts(lines):

What about using MetadataMap.fromfile?

Reply to this email directly or view it on GitHubhttps://github.com//pull/10/files#r9643987
.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MetadataMap currently resides in qiime.util. I would like to wrap up a lingering pull request I have before we move it to bipy, if possible. For now, in Qiita, we can import from QIIME. Then, once we move it, we can just change the import line to import from bipy instead. I think that's the cleanest way to go.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I agree that we should be using that object (regardless of where we import it from, for the time being) instead of this function!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agree. As we've discussed in the meeting, the MetadataMap used here is slightly different from the QIIME's one (e.g. the id).

I can add it in this PR, importing from QIIME.util and then we can change that import later on. I'll also change the function names and everything else that we need to change from the current code.

Is this blocking something? I can get this changes for next meeting, but if this is blocking something I can get it earlier.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I don't think this is blocking anything. Just in terms of progress, I was hoping to have the prototype fully ported by the end of last meeting. We didn't quite get there, but if we get it in by or during the next meeting, I think we'll be in great shape! Thanks @josenavas.

"""Parses a QIIME mapping file.

Inputs:
lines: Mapping file lines

Returns:
mapping: a dict of dicts representing the mapping file. Outer keys are
sample names and inner keys are column headers.
headers: A list of column headers
datatypes The datatypes of the columns, automatically determined to be
varchar, int, or float
"""
# might as well do this to avoid attribute lookups
isdigit = str.isdigit

# Find first non-comment line, assume the previous line (i.e., the last
# comment line at the top of the file) is the headers
headers = []
prev_line = ''
for line in lines:
if line.startswith('#'):
prev_line = line
continue
else:
headers = prev_line.strip().split('\t')[1:]
num_columns = len(headers)
break

# if we get here and don't have headers, abort
if not headers:
raise QiitaBDSQLParseError("Empty mapping file! Aborting.")

# seek back to the beginning of the file, and read in the data (skip
# comment lines)
lines.seek(0)
mapping = {}
for line in lines:
if line.startswith('#'):
continue
elements = [e.strip() for e in line.split('\t')]
sample_id, data = elements[0], elements[1:]
data = dict(zip(headers, data))
mapping[sample_id] = data

# determine datatypes
datatypes = []
sample_ids = mapping.keys()
for header in headers:
column_data = [mapping[sample_id][header] for sample_id in sample_ids]

if all([isdigit(c) for c in column_data]):
datatype = 'int'
elif all([isdigit(c.replace('.', '', 1)) for c in column_data]):
datatype = 'float'
else:
datatype = 'varchar'

datatypes.append(datatype)

return mapping, headers, datatypes
Loading