Skip to content

Load processed data #129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 20, 2014
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add load_processed_data command and tests
  • Loading branch information
Adam Robbins-Pianka committed Jun 20, 2014
commit a0274e5bc3f2c49aa9abe1e991022aead4aa6ba8
43 changes: 42 additions & 1 deletion qiita_db/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .study import Study, StudyPerson
from .user import User
from .util import get_filetypes, get_filepath_types
from .data import RawData
from .data import RawData, PreprocessedData, ProcessedData
from .metadata_template import SampleTemplate


Expand Down Expand Up @@ -108,3 +108,44 @@ def load_raw_data_cmd(filepaths, filepath_types, filetype, study_ids):

return RawData.create(filetype_id, list(zip(filepaths, filepath_types)),
studies)


def load_processed_data_cmd(fps, fp_types, processed_params_table_name,
processed_params_id, preprocessed_data_id=None,
processed_date=None):
"""Add a new processed data entry

Parameters
----------
fps : list of str
Paths to the processed data files to associate with the ProcessedData
object
fp_types: list of str
The types of files, one per fp
processed_params_table_name : str
The name of the processed_params_ table to use
processed_params_id : int
The ID of the row in the processed_params_ table
preprocessed_data_id : int, optional
Defaults to ``None``. The ID of the row in the preprocessed_data table.
processed_date : datetime, optional
Defaults to ``None``. The date and time to use as the processing date.

Returns
-------
qiita_db.ProcessedData
The newly created `qiita_db.ProcessedData` object
"""
if len(fps) != len(fp_types):
raise ValueError("Please pass exactly one fp_type for each "
"and every fp")

fp_types_dict = get_filepath_types()
fp_types = [fp_types_dict[x] for x in fp_types]

if preprocessed_data_id is not None:
preprocessed_data = PreprocessedData(preprocessed_data_id)

return ProcessedData.create(processed_params_table_name,
processed_params_id, list(zip(fps, fp_types)),
preprocessed_data, processed_date)
52 changes: 51 additions & 1 deletion qiita_db/test/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
from configparser import NoOptionError

from qiita_db.commands import (make_study_from_cmd, load_raw_data_cmd,
sample_template_adder)
sample_template_adder, load_processed_data_cmd)
from qiita_db.study import Study, StudyPerson
from qiita_db.user import User
from qiita_db.util import get_count, check_count, get_db_files_base_dir
from qiita_db.data import PreprocessedData
from qiita_core.util import qiita_test_checker


Expand Down Expand Up @@ -148,6 +149,55 @@ def test_load_data_from_cmd(self):
study_ids)


@qiita_test_checker()
class TestLoadProcessedDataFromCmd(TestCase):
def setUp(self):
fd, self.otu_table_fp = mkstemp(suffix='_otu_table.biom')
close(fd)

with open(self.otu_table_fp, "w") as f:
f.write("\n")

self.files_to_remove = []
self.files_to_remove.append(self.otu_table_fp)

self.db_test_processed_data_dir = join(get_db_files_base_dir(),
'processed_data')

def tearDown(self):
for fp in self.files_to_remove:
if exists(fp):
remove(fp)

def test_load_processed_data_from_cmd(self):
filepaths = [self.otu_table_fp]
filepath_types = ['biom']

initial_processed_data_count = get_count('qiita.processed_data')
initial_processed_fp_count = get_count('qiita.processed_filepath')
initial_fp_count = get_count('qiita.filepath')

new = load_processed_data_cmd(filepaths, filepath_types,
'processed_params_uclust', 1, 1, None)
processed_data_id = new.id
self.files_to_remove.append(
join(self.db_test_processed_data_dir,
'%d_%s' % (processed_data_id, basename(self.otu_table_fp))))

self.assertTrue(check_count('qiita.processed_data',
initial_processed_data_count + 1))
self.assertTrue(check_count('qiita.processed_filepath',
initial_processed_fp_count + 1))
self.assertTrue(check_count('qiita.filepath',
initial_fp_count + 1))

# Ensure that the ValueError is raised when a filepath_type is not
# provided for each and every filepath
with self.assertRaises(ValueError):
load_processed_data_cmd(filepaths, [], 'processed_params_uclust',
1, 1, None)


CONFIG_1 = """[required]
timeseries_type_id = 1
metadata_complete = True
Expand Down
34 changes: 32 additions & 2 deletions scripts/qiita_db
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
# -----------------------------------------------------------------------------

import click
from datetime import datetime

from qiita_db.util import get_filetypes, get_filepath_types
from qiita_db.util import (get_filetypes, get_filepath_types,
get_processed_params_tables)
from qiita_db.commands import (sample_template_adder, make_study_from_cmd,
load_raw_data_cmd)
load_raw_data_cmd, load_processed_data_cmd)


@click.group()
Expand All @@ -39,6 +41,34 @@ def load_raw_data(fp, fp_type, filetype, study):
load_raw_data_cmd(fp, fp_type, filetype, study)


@qiita_db.command()
@click.option('--fp', required=True, type=click.Path(resolve_path=True,
readable=True, exists=True), multiple=True, help='Path to the '
'processed data. This option can be used multilpe times if '
'there are multiple processed data files.')
@click.option('--fp_type', required=True, multiple=True, help='Describes the '
'contents of the file. Pass one fp_type per fp.',
type=click.Choice(get_filepath_types().keys()))
@click.option('--processed_params_table', required=True,
type=click.Choice(get_processed_params_tables()),
help='The table containing the processed parameters used to '
'generate this file')
@click.option('--processed_params_id', required=True, type=int,
help='The ID of the row in the processed_params table')
@click.option('--preprocessed_data_id', type=int, default=None, help='The '
'ID of the row in the preprocessed_data table from which '
'this processed data was created')
@click.option('--processed_date', type=datetime, default=None,
help='The date to use as the processed_date. If None, then '
'the current date and time will be used.')
def load_processed_data(fp, fp_type, processed_params_table,
processed_params_id, preprocessed_data_id,
processed_date):
load_processed_data_cmd(fp, fp_type, processed_params_table,
processed_params_id, preprocessed_data_id,
processed_date)


@qiita_db.command()
@click.option('--owner', help="The email address of the owner of the study")
@click.option('--title', help="The title of the study")
Expand Down