Skip to content

Commit 8ce7089

Browse files
committed
Merge pull request #122 from teravest/preproccesed
Ready for MERGE: Add command to import preprocessed data
2 parents 55381ad + 1175b23 commit 8ce7089

File tree

4 files changed

+142
-16
lines changed

4 files changed

+142
-16
lines changed

qiita_db/commands.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
88

9+
from dateutil.parser import parse
10+
from os import listdir
11+
from os.path import join
912
from functools import partial
1013
try:
1114
# Python 2
@@ -15,7 +18,6 @@
1518
from configparser import ConfigParser
1619

1720
import pandas as pd
18-
from dateutil.parser import parse
1921

2022
from .study import Study, StudyPerson
2123
from .user import User
@@ -24,8 +26,19 @@
2426
from .metadata_template import SampleTemplate
2527

2628

27-
def make_study_from_cmd(owner, title, info):
29+
def load_study_from_cmd(owner, title, info):
30+
r"""Adds a study to the database
2831
32+
Parameters
33+
----------
34+
owner : str
35+
The email address of the owner of the study_abstract
36+
title : str
37+
The title of the study_abstract
38+
info : file-like object
39+
File-like object containing study information
40+
41+
"""
2942
# Parse the configuration file
3043
config = ConfigParser()
3144
config.readfp(info)
@@ -71,6 +84,35 @@ def make_study_from_cmd(owner, title, info):
7184
Study.create(User(owner), title, efo_ids, infodict)
7285

7386

87+
def load_preprocessed_data_from_cmd(study_id, filedir, filepathtype,
88+
params_table, params_id,
89+
submitted_to_insdc):
90+
r"""Adds preprocessed data to the database
91+
92+
Parameters
93+
----------
94+
study_id : int
95+
The study id to which the preprocessed data belongs
96+
filedir : str
97+
Directory path of the preprocessed data
98+
filepathtype: str
99+
The filepath_type of the preprecessed data
100+
params_table_name : str
101+
The name of the table which contains the parameters of the
102+
preprocessing
103+
params_id : int
104+
The id of parameters int the params_table
105+
submitted_to_insdc : bool
106+
Has the data been submitted to insdc
107+
"""
108+
fp_types_dict = get_filepath_types()
109+
fp_type = fp_types_dict[filepathtype]
110+
filepaths = [(join(filedir, fp), fp_type) for fp in listdir(filedir)]
111+
return PreprocessedData.create(Study(study_id), params_table, params_id,
112+
filepaths,
113+
submitted_to_insdc=submitted_to_insdc)
114+
115+
74116
def sample_template_adder(sample_temp_path, study_id):
75117
r"""Adds a sample template to the database
76118
@@ -79,7 +121,7 @@ def sample_template_adder(sample_temp_path, study_id):
79121
sample_temp_path : str
80122
Path to the sample template file
81123
study_id : int
82-
The study id to wich the sample template belongs to
124+
The study id to which the sample template belongs
83125
"""
84126
sample_temp = pd.DataFrame.from_csv(sample_temp_path, sep='\t',
85127
infer_datetime_format=True)

qiita_db/test/test_commands.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
from os import remove, close
1010
from os.path import exists, join, basename
11-
from tempfile import mkstemp
11+
from tempfile import mkstemp, mkdtemp
12+
from shutil import rmtree
1213
from unittest import TestCase, main
1314
from future.utils.six import StringIO
1415
try:
@@ -18,12 +19,12 @@
1819
# Python 3
1920
from configparser import NoOptionError
2021

21-
from qiita_db.commands import (make_study_from_cmd, load_raw_data_cmd,
22-
sample_template_adder, load_processed_data_cmd)
22+
from qiita_db.commands import (load_study_from_cmd, load_raw_data_cmd,
23+
sample_template_adder, load_processed_data_cmd,
24+
load_preprocessed_data_from_cmd)
2325
from qiita_db.study import Study, StudyPerson
2426
from qiita_db.user import User
2527
from qiita_db.util import get_count, check_count, get_db_files_base_dir
26-
from qiita_db.data import PreprocessedData
2728
from qiita_core.util import qiita_test_checker
2829

2930

@@ -38,7 +39,7 @@ def setUp(self):
3839

3940
def test_make_study_from_cmd(self):
4041
fh = StringIO(self.config1)
41-
make_study_from_cmd('test@test.com', 'newstudy', fh)
42+
load_study_from_cmd('test@test.com', 'newstudy', fh)
4243
sql = ("select study_id from qiita.study where email = %s and "
4344
"study_title = %s")
4445
study_id = self.conn_handler.execute_fetchone(sql, ('test@test.com',
@@ -47,7 +48,52 @@ def test_make_study_from_cmd(self):
4748

4849
fh2 = StringIO(self.config2)
4950
with self.assertRaises(NoOptionError):
50-
make_study_from_cmd('test@test.com', 'newstudy2', fh2)
51+
load_study_from_cmd('test@test.com', 'newstudy2', fh2)
52+
53+
54+
@qiita_test_checker()
55+
class TestImportPreprocessedData(TestCase):
56+
def setUp(self):
57+
self.tmpdir = mkdtemp()
58+
fd, self.file1 = mkstemp(dir=self.tmpdir)
59+
close(fd)
60+
fd, self.file2 = mkstemp(dir=self.tmpdir)
61+
close(fd)
62+
with open(self.file1, "w") as f:
63+
f.write("\n")
64+
with open(self.file2, "w") as f:
65+
f.write("\n")
66+
67+
self.files_to_remove = [self.file1, self.file2]
68+
self.dirs_to_remove = [self.tmpdir]
69+
70+
self.db_test_ppd_dir = join(get_db_files_base_dir(),
71+
'preprocessed_data')
72+
73+
def tearDown(self):
74+
for fp in self.files_to_remove:
75+
if exists(fp):
76+
remove(fp)
77+
for dp in self.dirs_to_remove:
78+
if exists(dp):
79+
rmtree(dp)
80+
81+
def test_import_preprocessed_data(self):
82+
initial_ppd_count = get_count('qiita.preprocessed_data')
83+
initial_fp_count = get_count('qiita.filepath')
84+
ppd = load_preprocessed_data_from_cmd(
85+
1, self.tmpdir, 'tar', 'preprocessed_sequence_illumina_params',
86+
1, False)
87+
self.files_to_remove.append(
88+
join(self.db_test_ppd_dir,
89+
'%d_%s' % (ppd.id, basename(self.file1))))
90+
self.files_to_remove.append(
91+
join(self.db_test_ppd_dir,
92+
'%d_%s' % (ppd.id, basename(self.file2))))
93+
self.assertEqual(ppd.id, 3)
94+
self.assertTrue(check_count('qiita.preprocessed_data',
95+
initial_ppd_count + 1))
96+
self.assertTrue(check_count('qiita.filepath', initial_fp_count+2))
5197

5298

5399
@qiita_test_checker()

qiita_db/util.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,19 @@ def check_count(table, exp_count):
469469
return obs_count == exp_count
470470

471471

472+
def get_preprocessed_params_tables():
473+
"""returns a list of preprocessed parmaeter tables
474+
475+
Returns
476+
-------
477+
list or str
478+
"""
479+
sql = ("SELECT * FROM information_schema.tables WHERE table_schema = "
480+
"'qiita' AND SUBSTR(table_name, 1, 13) = 'preprocessed_'")
481+
conn = SQLConnectionHandler()
482+
return [x[2] for x in conn.execute_fetchall(sql)]
483+
484+
472485
def get_processed_params_tables():
473486
"""Returns a list of all tables starting with "processed_params_"
474487

scripts/qiita_db

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
import click
1212

1313
from qiita_db.util import (get_filetypes, get_filepath_types,
14-
get_processed_params_tables)
15-
from qiita_db.commands import (sample_template_adder, make_study_from_cmd,
16-
load_raw_data_cmd, load_processed_data_cmd)
14+
get_processed_params_tables,
15+
get_preprocessed_params_tables)
16+
from qiita_db.commands import (sample_template_adder, load_study_from_cmd,
17+
load_raw_data_cmd, load_processed_data_cmd,
18+
load_preprocessed_data_from_cmd)
1719

1820

1921
@click.group()
@@ -62,8 +64,8 @@ def load_raw_data(fp, fp_type, filetype, study):
6264
'interpretable as a datetime. If None, then the current date '
6365
'and time will be used.')
6466
def load_processed_data(fp, fp_type, processed_params_table,
65-
processed_params_id, preprocessed_data_id,
66-
processed_date):
67+
processed_params_id, preprocessed_data_id,
68+
processed_date):
6769
load_processed_data_cmd(fp, fp_type, processed_params_table,
6870
processed_params_id, preprocessed_data_id,
6971
processed_date)
@@ -75,8 +77,31 @@ def load_processed_data(fp, fp_type, processed_params_table,
7577
@click.option('--info', type=click.File(mode='r'),
7678
help="filepath of file with study information in python"
7779
"config file format")
78-
def insert_study_to_db(owner, title, info):
79-
make_study_from_cmd(owner, title, info)
80+
def load_study(owner, title, info):
81+
load_study_from_cmd(owner, title, info)
82+
83+
84+
@qiita_db.command()
85+
@click.option('--study_id', help="Study id associated with data",
86+
required=True)
87+
@click.option('--params_table', help="Name of the paramaters table for the "
88+
"preprocessed data", required=True,
89+
type=click.Choice(get_preprocessed_params_tables()))
90+
@click.option('--filedir', help="Directory containing preprocessed data",
91+
required=True)
92+
@click.option('--filepathtype', help="Describes the contents of the input "
93+
"files", required=True,
94+
type=click.Choice(get_filepath_types().keys()))
95+
@click.option('--params_id', required=True,
96+
help="id in the paramater table associated with the parameters")
97+
@click.option('--submitted_to_insdc', is_flag=True,
98+
help="If provided, the preprocessed data have been submitted"
99+
" to insdc", required=True)
100+
def load_preprocessed_data(study_id, filedir, filepathtype,
101+
params_table_name, params_id, submitted_to_insdc):
102+
load_preprocessed_data_from_cmd(study_id, filedir, filepathtype,
103+
params_table_name,
104+
params_id, submitted_to_insdc)
80105

81106

82107
@qiita_db.command()

0 commit comments

Comments
 (0)