Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions qiita_pet/handlers/api_proxy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
ena_ontology_get_req, prep_template_samples_get_req,
new_prep_template_get_req)
from .studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req)
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
study_files_get_req)
from .artifact import (artifact_graph_get_req, artifact_types_get_req,
artifact_post_req, artifact_get_req,
artifact_status_put_req, artifact_delete_req)
Expand All @@ -44,4 +45,5 @@
'artifact_post_req', 'ena_ontology_get_req',
'sample_template_meta_cats_get_req',
'sample_template_samples_get_req', 'prep_template_samples_get_req',
'sample_template_category_get_req', 'new_prep_template_get_req']
'sample_template_category_get_req', 'new_prep_template_get_req',
'study_files_get_req']
55 changes: 41 additions & 14 deletions qiita_pet/handlers/api_proxy/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from os.path import join
from functools import partial

from future.utils import viewitems

from qiita_core.util import execute_as_transaction
from qiita_core.qiita_settings import qiita_config
Expand Down Expand Up @@ -73,8 +76,9 @@ def artifact_post_req(user_id, filepaths, artifact_type, name,
----------
user_id : str
User adding the atrifact
filepaths : dict of {str: [str, ...], ...}
List of files to attach to the artifact, keyed to the file type
filepaths : dict of {str: str}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure the description and the example match.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - done

Comma-separated list of files to attach to the artifact,
keyed by file type
artifact_type : str
The type of the artifact
name : str
Expand All @@ -92,23 +96,46 @@ def artifact_post_req(user_id, filepaths, artifact_type, name,
"""
prep = PrepTemplate(int(prep_template_id))
study_id = prep.study_id

# First check if the user has access to the study
access_error = check_access(study_id, user_id)
if access_error:
return access_error

uploads_path = get_mountpoint('uploads')[0][1]
path_builder = partial(join, uploads_path, str(study_id))
cleaned_filepaths = []
for ftype in filepaths:
# Check if filepath being passed exists for study
for fp in filepaths[ftype]:
full_fp = join(uploads_path, str(study_id), fp)
exists = check_fp(study_id, full_fp)
if exists['status'] != 'success':
return {'status': 'error',
'message': 'File does not exist: %s' % fp}
cleaned_filepaths.append((full_fp, ftype))

artifact = Artifact.create(cleaned_filepaths, artifact_type, name=name,
prep_template=prep)
for ftype, file_list in viewitems(filepaths):
# JavaScript sends us this list as a comma-separated list
for fp in file_list.split(','):
# If the list was initially empty, the elemnt fp will the empty
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

element

# string, which will make the check below for "exists" to check
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you rewrite? I don't think it makes a lot of sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# for the directory, instead than for the actual file
if fp:
# Check if filepath being passed exists for study
full_fp = path_builder(fp)
exists = check_fp(study_id, full_fp)
if exists['status'] != 'success':
return {'status': 'error',
'message': 'File does not exist: %s' % fp}
cleaned_filepaths.append((full_fp, ftype))

# This should never happen, but it doesn't hurt to actually have
# a explicit check, in case there is something odd with the JS
if not cleaned_filepaths:
return {'status': 'error',
'message': "Can't create artifact, no files provided."}

try:
artifact = Artifact.create(cleaned_filepaths, artifact_type, name=name,
prep_template=prep)
except Exception as e:
# We should hit this exception rarelly (that's why is an exception)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rarelly -> rarely
why is -> why it is

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# since at this point we have done multiple checks. However, it can
# occur in weird cases, so better let the GUI know that this fail
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fail -> failed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

return {'status': 'error',
'message': "Error creating artifact: %s" % str(e)}

return {'status': 'success',
'message': '',
'artifact': artifact.id}
Expand Down
72 changes: 72 additions & 0 deletions qiita_pet/handlers/api_proxy/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from __future__ import division

from qiita_db.study import Study
from qiita_db.metadata_template.prep_template import PrepTemplate
from qiita_db.util import (supported_filepath_types,
get_files_from_uploads_folders)
from qiita_pet.handlers.api_proxy.util import check_access


Expand Down Expand Up @@ -168,3 +171,72 @@ def study_prep_get_req(study_id, user_id):
return {'status': 'success',
'message': '',
'info': prep_info}


def study_files_get_req(study_id, prep_template_id, artifact_type):
"""Returns the uploaded files for the study id categorized by artifact_type

It retrieves the files uploaded for the given study and tries to do a
guess on how those files should be added to the artifact of the given
type. Uses information on the prep template to try to do a better guess.

Parameters
----------
study_id : int
The study id
prep_template_id : int
The prep template id
artifact_type : str
The artifact type

Returns
-------
dict of {str: object}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{str: object} doesn't really match the below description.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The returned dictionary is keyed by strings, but the values to those keys can be either strings, int, or lists. In order to englobe all these types in the type description, I used the common base class object. Do you have a better alternative?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about: "see below"? No hard feelings so fine to leave it as is.

A dict of the form {'status': str,
'message': str,
'remaining': list of str,
'file_types': list of (str, bool, list of str),
'num_prefixes': int}
where 'status' is a string specifying whether the query is successfull,
'message' is a human-readable description of the error (optional),
'remaining' is the list of files that could not be categorized,
'file_types' is a list of the available filetypes, if it is required
or not and the list of categorized files for the given artifact type
and 'num_prefixes' is the number of different run prefix values in
the given prep template.
"""
supp_file_types = supported_filepath_types(artifact_type)
selected = []
remaining = []

uploaded = get_files_from_uploads_folders(study_id)
pt = PrepTemplate(prep_template_id).to_dataframe()

if (any(ft.startswith('raw_') for ft, _ in supp_file_types) and
'run_prefix' in pt.columns):
prep_prefixes = tuple(set(pt['run_prefix']))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about tuple(pt['run_prefixes'].unique())? Embracing pandas, not sure it's faster or anything, but the intent is clearer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pandas is ~2X slower so leaving as it is:

In [18]: %timeit tuple(pt['run_prefix'].unique())
10000 loops, best of 3: 19.3 µs per loop

In [19]: %timeit tuple(set(pt['run_prefix']))
100000 loops, best of 3: 9.15 µs per loop

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 ⏳

num_prefixes = len(prep_prefixes)
for _, filename in uploaded:
if filename.startswith(prep_prefixes):
selected.append(filename)
else:
remaining.append(filename)
else:
num_prefixes = 0
remaining = [f for _, f in uploaded]

# At this point we can't make anything smart about selecting by default
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make -> do

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# the files for each type. The only thing that we can do is assume that
# the first in the supp_file_types list is the default one where files
# should be added in case of 'run_prefix' being present
file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
first = supp_file_types[0]
# Note that this works even if `run_prefix` is not in the prep template
# because selected is initialized to the empty list
file_types.insert(0, (first[0], first[1], selected))

return {'status': 'success',
'message': '',
'remaining': remaining,
'file_types': file_types,
'num_prefixes': num_prefixes}
69 changes: 51 additions & 18 deletions qiita_pet/handlers/api_proxy/tests/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,12 @@ def setUp(self):
with open(self.update_fp, 'w') as f:
f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

self._files_to_remove = [self.update_fp]

def tearDown(self):
if exists(self.update_fp):
remove(self.update_fp)
for fp in self._files_to_remove:
if exists(fp):
remove(fp)

# Replace file if removed as part of function testing
uploads_path = get_mountpoint('uploads')[0][1]
Expand Down Expand Up @@ -144,35 +147,65 @@ def test_artifact_delete_req_no_access(self):

def test_artifact_post_req(self):
# Create new prep template to attach artifact to
new_prep_id = get_count('qiita.prep_template') + 1
npt.assert_warns(
pt = npt.assert_warns(
QiitaDBWarning, PrepTemplate.create,
pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

new_artifact_id = get_count('qiita.artifact') + 1
filepaths = {'raw_forward_seqs': 'uploaded_file.txt',
'raw_barcodes': 'update.txt'}
obs = artifact_post_req(
'test@foo.bar', {'raw_forward_seqs': ['uploaded_file.txt'],
'raw_reverse_seqs': []},
'per_sample_FASTQ', 'New Test Artifact', new_prep_id)
'test@foo.bar', filepaths, 'FASTQ', 'New Test Artifact', pt.id)
exp = {'status': 'success',
'message': '',
'artifact': new_artifact_id}
self.assertEqual(obs, exp)
# Instantiate the artifact to make sure it was made
Artifact(new_artifact_id)

def test_artifact_post_req_bad_file(self):
# Create new prep template to attach artifact to
new_prep_id = get_count('qiita.prep_template') + 1
npt.assert_warns(
# Instantiate the artifact to make sure it was made and
# to clean the environment
a = Artifact(new_artifact_id)
self._files_to_remove.extend([fp for _, fp, _ in a.filepaths])

def test_artifact_post_req_error(self):
# Create a new prep template to attach the artifact to
pt = npt.assert_warns(
QiitaDBWarning, PrepTemplate.create,
pd.DataFrame({'new_col': {'1.SKD6.640190': 1}}), Study(1), '16S')
self._files_to_remove.extend([fp for _, fp in pt.get_filepaths()])

obs = artifact_post_req(
'test@foo.bar', {'raw_forward_seqs': ['NOEXIST']},
'per_sample_FASTQ', 'New Test Artifact', new_prep_id)
user_id = 'test@foo.bar'
filepaths = {'raw_barcodes': 'uploaded_file.txt',
'raw_forward_seqs': 'update.txt'}
artifact_type = "FASTQ"
name = "TestArtifact"

# The user doesn't have access to the study
obs = artifact_post_req("demo@microbio.me", filepaths, artifact_type,
name, pt.id)
exp = {'status': 'error',
'message': 'User does not have access to study'}
self.assertEqual(obs, exp)

# A file does not exist
missing_fps = {'raw_barcodes': 'NOTEXISTS'}
obs = artifact_post_req(user_id, missing_fps, artifact_type,
name, pt.id)
exp = {'status': 'error',
'message': 'File does not exist: NOTEXISTS'}
self.assertEqual(obs, exp)

# Cleaned filepaths is empty
empty_fps = {'raw_barcodes': '', 'raw_forward_seqs': ''}
obs = artifact_post_req(user_id, empty_fps, artifact_type, name, pt.id)
exp = {'status': 'error',
'message': "Can't create artifact, no files provided."}
self.assertEqual(obs, exp)

# Exception
obs = artifact_post_req(user_id, filepaths, artifact_type, name, 1)
exp = {'status': 'error',
'message': 'File does not exist: NOEXIST'}
'message': "Error creating artifact: Prep template 1 already "
"has an artifact associated"}
self.assertEqual(obs, exp)

def test_artifact_status_put_req(self):
Expand Down
14 changes: 13 additions & 1 deletion qiita_pet/handlers/api_proxy/tests/test_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from qiita_core.util import qiita_test_checker
import qiita_db as qdb
from qiita_pet.handlers.api_proxy.studies import (
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req)
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
study_files_get_req)


@qiita_test_checker()
Expand Down Expand Up @@ -148,5 +149,16 @@ def test_study_delete_req_no_exists(self):
'message': 'Study does not exist'}
self.assertEqual(obs, exp)

def test_study_files_get_req(self):
obs = study_files_get_req(1, 1, 'FASTQ')
exp = {'status': 'success',
'message': '',
'remaining': ['uploaded_file.txt'],
'file_types': [('raw_barcodes', True, []),
('raw_forward_seqs', True, []),
('raw_reverse_seqs', False, [])],
'num_prefixes': 1}
self.assertEqual(obs, exp)

if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions qiita_pet/handlers/study_handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .ebi_handlers import EBISubmitHandler
from .vamps_handlers import VAMPSHandler
from .base import (StudyIndexHandler, StudyBaseInfoAJAX, StudyDeleteAjax,
DataTypesMenuAJAX)
DataTypesMenuAJAX, StudyFilesAJAX)
from .prep_template import (
PrepTemplateGraphAJAX, PrepTemplateAJAX, PrepFilesHandler,
NewPrepTemplateAjax)
Expand All @@ -32,4 +32,4 @@
'NewArtifactHandler', 'PrepFilesHandler', 'ProcessArtifactHandler',
'ListCommandsHandler', 'ListOptionsHandler', 'SampleAJAX',
'StudyDeleteAjax', 'ArtifactAJAX', 'NewPrepTemplateAjax',
'DataTypesMenuAJAX']
'DataTypesMenuAJAX', 'StudyFilesAJAX']
55 changes: 17 additions & 38 deletions qiita_pet/handlers/study_handlers/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@
from tornado.web import authenticated


from qiita_db.util import get_files_from_uploads_folders
from qiita_pet.handlers.util import to_int
from qiita_pet.handlers.base_handlers import BaseHandler
from qiita_pet.handlers.api_proxy import (
artifact_graph_get_req, artifact_types_get_req, data_types_get_req,
ena_ontology_get_req, prep_template_post_req, artifact_post_req,
artifact_graph_get_req, artifact_types_get_req, artifact_post_req,
artifact_status_put_req, artifact_get_req, artifact_delete_req)
from qiita_core.util import execute_as_transaction
from qiita_core.qiita_settings import qiita_config
Expand All @@ -32,48 +30,29 @@ def get(self):

class NewArtifactHandler(BaseHandler):
@authenticated
def get(self, study_id):
prep_files = [f for _, f in get_files_from_uploads_folders(study_id)
if f.endswith(('txt', 'tsv'))]
def get(self):
study_id = self.get_argument("study_id")
prep_id = self.get_argument("prep_template_id")
artifact_types = artifact_types_get_req()['types']
data_types = sorted(data_types_get_req()['data_types'])
ontology = ena_ontology_get_req()
self.render("study_ajax/add_prep_artifact.html", prep_files=prep_files,
artifact_types=artifact_types, data_types=data_types,
ontology=ontology, study_id=study_id)

self.render("study_ajax/add_artifact.html",
study_id=study_id, prep_id=prep_id,
artifact_types=artifact_types)

@authenticated
@execute_as_transaction
def post(self, study_id):
study_id = int(study_id)
def post(self):
artifact_type = self.get_argument('artifact-type')
name = self.get_argument('name')
data_type = self.get_argument('data-type')
ena_ontology = self.get_argument('ena-ontology', None)
user_ontology = self.get_argument('user-ontology', None)
new_ontology = self.get_argument('new-ontology', None)
artifact_type = self.get_argument('type')
prep_file = self.get_argument('prep-file')

# Remove known columns, leaving just file types and files
files = self.request.arguments
for arg in ['name', 'data-type', 'ena-ontology', 'user-ontology',
'new-ontology', 'type', 'prep-file']:
files.pop(arg, None)

prep = prep_template_post_req(study_id, self.current_user.id,
prep_file, data_type, ena_ontology,
user_ontology, new_ontology)
if prep['status'] == 'error':
self.write(prep)
return
prep_id = self.get_argument('prep-template-id')

# Request the rest of the arguments, which will be the files
files = {arg: self.get_argument(arg) for arg in self.request.arguments
if arg not in ['name', 'prep-template-id', 'artifact-type']}

artifact = artifact_post_req(
self.current_user.id, files, artifact_type, name, prep['id'])
if artifact['status'] == 'success' and prep['status'] != 'warning':
self.write({'status': 'success',
'message': 'Artifact created successfully'})
else:
self.write(prep)
self.current_user.id, files, artifact_type, name, prep_id)
self.write(artifact)


class ArtifactAJAX(BaseHandler):
Expand Down
Loading