Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions qiita_pet/handlers/api_proxy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
ena_ontology_get_req, prep_template_samples_get_req,
new_prep_template_get_req)
from .studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req)
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
study_files_get_req)
from .artifact import (artifact_graph_get_req, artifact_types_get_req,
artifact_post_req, artifact_get_req,
artifact_status_put_req, artifact_delete_req)
Expand All @@ -44,4 +45,5 @@
'artifact_post_req', 'ena_ontology_get_req',
'sample_template_meta_cats_get_req',
'sample_template_samples_get_req', 'prep_template_samples_get_req',
'sample_template_category_get_req', 'new_prep_template_get_req']
'sample_template_category_get_req', 'new_prep_template_get_req',
'study_files_get_req']
57 changes: 43 additions & 14 deletions qiita_pet/handlers/api_proxy/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from os.path import join
from functools import partial

from future.utils import viewitems

from qiita_core.util import execute_as_transaction
from qiita_core.qiita_settings import qiita_config
Expand Down Expand Up @@ -73,8 +76,9 @@ def artifact_post_req(user_id, filepaths, artifact_type, name,
----------
user_id : str
User adding the atrifact
filepaths : dict of {str: [str, ...], ...}
List of files to attach to the artifact, keyed to the file type
filepaths : str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This description doesn't match "my inferred type", it seems like it really is a dictionary, not a string, am I missing something?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🍺 you're right

Comma-separated list of files to attach to the artifact,
keyed by file type
artifact_type : str
The type of the artifact
name : str
Expand All @@ -92,23 +96,48 @@ def artifact_post_req(user_id, filepaths, artifact_type, name,
"""
prep = PrepTemplate(int(prep_template_id))
study_id = prep.study_id

# First check if the user has access to the study
access_error = check_access(study_id, user_id)
if access_error:
return access_error

uploads_path = get_mountpoint('uploads')[0][1]
path_builder = partial(join, uploads_path, str(study_id))
cleaned_filepaths = []
for ftype in filepaths:
# Check if filepath being passed exists for study
for fp in filepaths[ftype]:
full_fp = join(uploads_path, str(study_id), fp)
exists = check_fp(study_id, full_fp)
if exists['status'] != 'success':
return {'status': 'error',
'message': 'File does not exist: %s' % fp}
cleaned_filepaths.append((full_fp, ftype))

artifact = Artifact.create(cleaned_filepaths, artifact_type, name=name,
prep_template=prep)
for ftype, file_list in viewitems(filepaths):
# JavaScript sends us this list as a comma-separated list
for fp in file_list.split(','):
# JavaScript will send this value as an empty string if the
# list of files was empty. In such case, the split will generate
# a single element containing the empty string. Check for that case
# here and, if fp is not the empty string, proceed to check if
# the file exists
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the description here, very useful. Just to check, there's no possible way to get commas in these filenames right? I'm wondering if what we should be getting should be a JSON object, but that I guess gets more in the topic of the REST API etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The serialization of that object happens in the HTML serialization of the form. I'm unsure why javascript (or HTML) is not serializing that list as a JSON object and self.argument is not unwrapping it like that, but that is why I'm receiving in the handler. Agree that we should totally revisit for a REST API

if fp:
# Check if filepath being passed exists for study
full_fp = path_builder(fp)
exists = check_fp(study_id, full_fp)
if exists['status'] != 'success':
return {'status': 'error',
'message': 'File does not exist: %s' % fp}
cleaned_filepaths.append((full_fp, ftype))

# This should never happen, but it doesn't hurt to actually have
# a explicit check, in case there is something odd with the JS
if not cleaned_filepaths:
return {'status': 'error',
'message': "Can't create artifact, no files provided."}

try:
artifact = Artifact.create(cleaned_filepaths, artifact_type, name=name,
prep_template=prep)
except Exception as e:
# We should hit this exception rarelly (that's why is an exception)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rarelly -> rarely
why is -> why it is

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# since at this point we have done multiple checks. However, it can
# occur in weird cases, so better let the GUI know that this fail
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fail -> failed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

return {'status': 'error',
'message': "Error creating artifact: %s" % str(e)}

return {'status': 'success',
'message': '',
'artifact': artifact.id}
Expand Down
72 changes: 72 additions & 0 deletions qiita_pet/handlers/api_proxy/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from __future__ import division

from qiita_db.study import Study
from qiita_db.metadata_template.prep_template import PrepTemplate
from qiita_db.util import (supported_filepath_types,
get_files_from_uploads_folders)
from qiita_pet.handlers.api_proxy.util import check_access


Expand Down Expand Up @@ -168,3 +171,72 @@ def study_prep_get_req(study_id, user_id):
return {'status': 'success',
'message': '',
'info': prep_info}


def study_files_get_req(study_id, prep_template_id, artifact_type):
"""Returns the uploaded files for the study id categorized by artifact_type

It retrieves the files uploaded for the given study and tries to do a
guess on how those files should be added to the artifact of the given
type. Uses information on the prep template to try to do a better guess.

Parameters
----------
study_id : int
The study id
prep_template_id : int
The prep template id
artifact_type : str
The artifact type

Returns
-------
dict of {str: object}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{str: object} doesn't really match the below description.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The returned dictionary is keyed by strings, but the values to those keys can be either strings, int, or lists. In order to englobe all these types in the type description, I used the common base class object. Do you have a better alternative?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about: "see below"? No hard feelings so fine to leave it as is.

A dict of the form {'status': str,
'message': str,
'remaining': list of str,
'file_types': list of (str, bool, list of str),
'num_prefixes': int}
where 'status' is a string specifying whether the query is successfull,
'message' is a human-readable description of the error (optional),
'remaining' is the list of files that could not be categorized,
'file_types' is a list of the available filetypes, if it is required
or not and the list of categorized files for the given artifact type
and 'num_prefixes' is the number of different run prefix values in
the given prep template.
"""
supp_file_types = supported_filepath_types(artifact_type)
selected = []
remaining = []

uploaded = get_files_from_uploads_folders(study_id)
pt = PrepTemplate(prep_template_id).to_dataframe()

if (any(ft.startswith('raw_') for ft, _ in supp_file_types) and
'run_prefix' in pt.columns):
prep_prefixes = tuple(set(pt['run_prefix']))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about tuple(pt['run_prefixes'].unique())? Embracing pandas, not sure it's faster or anything, but the intent is clearer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pandas is ~2X slower so leaving as it is:

In [18]: %timeit tuple(pt['run_prefix'].unique())
10000 loops, best of 3: 19.3 µs per loop

In [19]: %timeit tuple(set(pt['run_prefix']))
100000 loops, best of 3: 9.15 µs per loop

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 ⏳

num_prefixes = len(prep_prefixes)
for _, filename in uploaded:
if filename.startswith(prep_prefixes):
selected.append(filename)
else:
remaining.append(filename)
else:
num_prefixes = 0
remaining = [f for _, f in uploaded]

# At this point we can't make anything smart about selecting by default
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make -> do

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# the files for each type. The only thing that we can do is assume that
# the first in the supp_file_types list is the default one where files
# should be added in case of 'run_prefix' being present
file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
first = supp_file_types[0]
# Note that this works even if `run_prefix` is not in the prep template
# because selected is initialized to the empty list
file_types.insert(0, (first[0], first[1], selected))

return {'status': 'success',
'message': '',
'remaining': remaining,
'file_types': file_types,
'num_prefixes': num_prefixes}
69 changes: 51 additions & 18 deletions qiita_pet/handlers/api_proxy/tests/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,12 @@ def setUp(self):
with open(self.update_fp, 'w') as f:
f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

self._files_to_remove = [self.update_fp]

def tearDown(self):
if exists(self.update_fp):
remove(self.update_fp)
for fp in self._files_to_remove:
if exists(fp):
remove(fp)

# Replace file if removed as part of function testing
uploads_path = get_mountpoint('uploads')[0][1]
Expand Down Expand Up @@ -144,35 +147,65 @@ def test_artifact_delete_req_no_access(self):

def test_artifact_post_req(self):
# Create new prep template to attach artifact to
new_prep_id = get_count('qiita.prep_template') + 1
npt.assert_warns(
pt = npt.assert_warns(
QiitaDBWarning, PrepTemplate.create,
pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

new_artifact_id = get_count('qiita.artifact') + 1
filepaths = {'raw_forward_seqs': 'uploaded_file.txt',
'raw_barcodes': 'update.txt'}
obs = artifact_post_req(
'test@foo.bar', {'raw_forward_seqs': ['uploaded_file.txt'],
'raw_reverse_seqs': []},
'per_sample_FASTQ', 'New Test Artifact', new_prep_id)
'test@foo.bar', filepaths, 'FASTQ', 'New Test Artifact', pt.id)
exp = {'status': 'success',
'message': '',
'artifact': new_artifact_id}
self.assertEqual(obs, exp)
# Instantiate the artifact to make sure it was made
Artifact(new_artifact_id)

def test_artifact_post_req_bad_file(self):
# Create new prep template to attach artifact to
new_prep_id = get_count('qiita.prep_template') + 1
npt.assert_warns(
# Instantiate the artifact to make sure it was made and
# to clean the environment
a = Artifact(new_artifact_id)
self._files_to_remove.extend([fp for _, fp, _ in a.filepaths])

def test_artifact_post_req_error(self):
# Create a new prep template to attach the artifact to
pt = npt.assert_warns(
QiitaDBWarning, PrepTemplate.create,
pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

obs = artifact_post_req(
'test@foo.bar', {'raw_forward_seqs': ['NOEXIST']},
'per_sample_FASTQ', 'New Test Artifact', new_prep_id)
user_id = 'test@foo.bar'
filepaths = {'raw_barcodes': 'uploaded_file.txt',
'raw_forward_seqs': 'update.txt'}
artifact_type = "FASTQ"
name = "TestArtifact"

# The user doesn't have access to the study
obs = artifact_post_req("demo@microbio.me", filepaths, artifact_type,
name, pt.id)
exp = {'status': 'error',
'message': 'User does not have access to study'}
self.assertEqual(obs, exp)

# A file does not exist
missing_fps = {'raw_barcodes': 'NOTEXISTS'}
obs = artifact_post_req(user_id, missing_fps, artifact_type,
name, pt.id)
exp = {'status': 'error',
'message': 'File does not exist: NOTEXISTS'}
self.assertEqual(obs, exp)

# Cleaned filepaths is empty
empty_fps = {'raw_barcodes': '', 'raw_forward_seqs': ''}
obs = artifact_post_req(user_id, empty_fps, artifact_type, name, pt.id)
exp = {'status': 'error',
'message': "Can't create artifact, no files provided."}
self.assertEqual(obs, exp)

# Exception
obs = artifact_post_req(user_id, filepaths, artifact_type, name, 1)
exp = {'status': 'error',
'message': 'File does not exist: NOEXIST'}
'message': "Error creating artifact: Prep template 1 already "
"has an artifact associated"}
self.assertEqual(obs, exp)

def test_artifact_status_put_req(self):
Expand Down
14 changes: 13 additions & 1 deletion qiita_pet/handlers/api_proxy/tests/test_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from qiita_core.util import qiita_test_checker
import qiita_db as qdb
from qiita_pet.handlers.api_proxy.studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req)
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
study_files_get_req)


@qiita_test_checker()
Expand Down Expand Up @@ -148,5 +149,16 @@ def test_study_delete_req_no_exists(self):
'message': 'Study does not exist'}
self.assertEqual(obs, exp)

def test_study_files_get_req(self):
obs = study_files_get_req(1, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': ['uploaded_file.txt'],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1}
self.assertEqual(obs, exp)

if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions qiita_pet/handlers/study_handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .ebi_handlers import EBISubmitHandler
from .vamps_handlers import VAMPSHandler
from .base import (StudyIndexHandler, StudyBaseInfoAJAX, StudyDeleteAjax,
DataTypesMenuAJAX)
DataTypesMenuAJAX, StudyFilesAJAX)
from .prep_template import (
PrepTemplateGraphAJAX, PrepTemplateAJAX, PrepFilesHandler,
NewPrepTemplateAjax)
Expand All @@ -32,4 +32,4 @@
'NewArtifactHandler', 'PrepFilesHandler', 'ProcessArtifactHandler',
'ListCommandsHandler', 'ListOptionsHandler', 'SampleAJAX',
'StudyDeleteAjax', 'ArtifactAJAX', 'NewPrepTemplateAjax',
'DataTypesMenuAJAX']
'DataTypesMenuAJAX', 'StudyFilesAJAX']
Loading