Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 4 additions & 22 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@
from __future__ import division
from future.utils import viewitems, viewvalues
from future.builtins import zip
from os.path import join
from functools import partial
from itertools import chain
from copy import deepcopy

Expand Down Expand Up @@ -1077,26 +1075,10 @@ def add_filepath(self, filepath, fp_id=None):
def get_filepaths(self):
r"""Retrieves the list of (filepath_id, filepath)"""
with qdb.sql_connection.TRN:
try:
sql = """SELECT filepath_id, filepath
FROM qiita.filepath
WHERE filepath_id IN (
SELECT filepath_id FROM qiita.{0}
WHERE {1}=%s)
ORDER BY filepath_id DESC""".format(
self._filepath_table, self._id_column)

qdb.sql_connection.TRN.add(sql, [self.id])
filepath_ids = qdb.sql_connection.TRN.execute_fetchindex()
except Exception as e:
qdb.logger.LogEntry.create(
'Runtime', str(e), info={self.__class__.__name__: self.id})
raise e

_, fb = qdb.util.get_mountpoint('templates')[0]
base_fp = partial(join, fb)

return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
return [(fp_id, fp)
for fp_id, fp, _ in qdb.util.retrieve_filepaths(
self._filepath_table, self._id_column, self.id,
sort='descendent')]

def categories(self):
"""Identifies the metadata columns present in a template
Expand Down
40 changes: 12 additions & 28 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,20 +498,13 @@ def create_qiime_mapping_file(self):
else:
new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']

# getting the latest sample template
sql = """SELECT filepath_id, filepath
FROM qiita.filepath
JOIN qiita.sample_template_filepath
USING (filepath_id)
WHERE study_id=%s
ORDER BY filepath_id DESC"""
qdb.sql_connection.TRN.add(sql, [self.study_id])
# We know that the good filepath is the one in the first row
# because we sorted them in the SQL query
sample_template_fname = \
qdb.sql_connection.TRN.execute_fetchindex()[0][1]
_, fp = qdb.util.get_mountpoint('templates')[0]
sample_template_fp = join(fp, sample_template_fname)
# Retrieve the latest sample template
# Since we sorted the filepath retrieval, the first result contains
# the filepath that we want. `retrieve_filepaths` returns a
# 3-tuple, in which the fp is the second element
sample_template_fp = qdb.util.retrieve_filepaths(
"sample_template_filepath", "study_id", self.study_id,
sort='descendent')[0][1]

# reading files via pandas
st = qdb.metadata_template.util.load_template_to_dataframe(
Expand Down Expand Up @@ -610,20 +603,11 @@ def qiime_map_fp(self):
str
The filepath of the QIIME mapping file
"""
with qdb.sql_connection.TRN:
sql = """SELECT filepath_id, filepath
FROM qiita.filepath
JOIN qiita.{0} USING (filepath_id)
JOIN qiita.filepath_type USING (filepath_type_id)
WHERE {1} = %s AND filepath_type = 'qiime_map'
ORDER BY filepath_id DESC""".format(self._filepath_table,
self._id_column)
qdb.sql_connection.TRN.add(sql, [self._id])
# We know that the good filepath is the one in the first row
# because we sorted them in the SQL query
fn = qdb.sql_connection.TRN.execute_fetchindex()[0][1]
base_dir = qdb.util.get_mountpoint('templates')[0][1]
return join(base_dir, fn)
for _, fp, fp_type in qdb.util.retrieve_filepaths(
self._filepath_table, self._id_column, self.id,
sort='descendent'):
if fp_type == 'qiime_map':
return fp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is no 'qiime_map', this returns None. Does this warrant a test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That case should never happen (i.e. all prep template objects should have the qiime_map).
However, in case that it doesn't exist, I think returning None is the correct approach.


@property
def ebi_experiment_accessions(self):
Expand Down
16 changes: 16 additions & 0 deletions qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,22 @@ def test_retrieve_filepaths(self):
"raw_barcodes")]
self.assertEqual(obs, exp)

def test_retrieve_filepaths_sort(self):
obs = qdb.util.retrieve_filepaths(
'artifact_filepath', 'artifact_id', 1, sort='descendent')
path_builder = partial(
join, qdb.util.get_db_files_base_dir(), "raw_data")
exp = [(2, path_builder("1_s_G1_L001_sequences_barcodes.fastq.gz"),
"raw_barcodes"),
(1, path_builder("1_s_G1_L001_sequences.fastq.gz"),
"raw_forward_seqs")]
self.assertEqual(obs, exp)

def test_retrieve_filepaths_error(self):
with self.assertRaises(qdb.exceptions.QiitaDBError):
qdb.util.retrieve_filepaths('artifact_filepath', 'artifact_id', 1,
sort='Unknown')

def _common_purge_filpeaths_test(self):
# Get all the filepaths so we can test if they've been removed or not
sql_fp = "SELECT filepath, data_directory_id FROM qiita.filepath"
Expand Down
18 changes: 16 additions & 2 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ def str_to_id(x):
chain.from_iterable(qdb.sql_connection.TRN.execute()[idx:])))


def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id):
def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id, sort=None):
"""Retrieves the filepaths for the given object id

Parameters
Expand All @@ -656,6 +656,9 @@ def retrieve_filepaths(obj_fp_table, obj_id_column, obj_id):
The name of the column that represents the object id
obj_id : int
The object id
sort : {'ascendent', 'descendent'}, optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather have {'ascendent', 'descendent'} or {'ascending', 'descending'}?

pandas uses the later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for retrieving the children graph (descendents) or parents graph (ascendents), not sorting, so not sure that terminology fits here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, commenting on wrong PR, ignore above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call - I knew it sounded weird hahahaha - changing

The direction in which the results are sorted, using the filepath id
as sorting key. Default: None, no sorting is applyed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

applyed -> applied

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


Returns
-------
Expand All @@ -670,14 +673,25 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
else:
return join(db_dir, mountpoint, filepath)

sql_sort = ""
if sort == 'ascendent':
sql_sort = " ORDER BY filepath_id"
elif sort == 'descendent':
sql_sort = " ORDER BY filepath_id DESC"
elif sort is not None:
raise qdb.exceptions.QiitaDBError(
"Unknown sorting direction: %s. Please choose from 'ascendent' or "
"'descendent'" % sort)

with qdb.sql_connection.TRN:
sql = """SELECT filepath_id, filepath, filepath_type, mountpoint,
subdirectory
FROM qiita.filepath
JOIN qiita.filepath_type USING (filepath_type_id)
JOIN qiita.data_directory USING (data_directory_id)
JOIN qiita.{0} USING (filepath_id)
WHERE {1} = %s""".format(obj_fp_table, obj_id_column)
WHERE {1} = %s{2}""".format(obj_fp_table, obj_id_column,
sql_sort)
qdb.sql_connection.TRN.add(sql, [obj_id])
results = qdb.sql_connection.TRN.execute_fetchindex()
db_dir = get_db_files_base_dir()
Expand Down