35
35
>>> print rd.id # doctest: +SKIP
36
36
2
37
37
38
- Retrieve if the raw data files have been submitted to insdc
39
-
40
- >>> rd.is_submitted_to_insdc() # doctest: +SKIP
41
- False
42
-
43
38
Retrieve the filepaths associated with the raw data
44
39
45
40
>>> rd.get_filepaths() # doctest: +SKIP
91
86
from .base import QiitaObject
92
87
from .sql_connection import SQLConnectionHandler
93
88
from .util import (exists_dynamic_table , get_db_files_base_dir ,
94
- compute_checksum , insert_filepaths )
89
+ insert_filepaths )
95
90
96
91
97
92
class BaseData (QiitaObject ):
@@ -195,7 +190,6 @@ class RawData(BaseData):
195
190
Methods
196
191
-------
197
192
create
198
- is_submitted_to_insdc
199
193
200
194
See Also
201
195
--------
@@ -210,7 +204,7 @@ class RawData(BaseData):
210
204
_study_raw_table = "study_raw_data"
211
205
212
206
@classmethod
213
- def create (cls , filetype , filepaths , studies , submitted_to_insdc = False ):
207
+ def create (cls , filetype , filepaths , studies ):
214
208
r"""Creates a new object with a new id on the storage system
215
209
216
210
Parameters
@@ -221,8 +215,6 @@ def create(cls, filetype, filepaths, studies, submitted_to_insdc=False):
221
215
The list of paths to the raw files and its filepath type identifier
222
216
studies : list of Study
223
217
The list of Study objects to which the raw data belongs to
224
- submitted_to_insdc : bool
225
- If true, the raw data files have been submitted to insdc
226
218
227
219
Returns
228
220
-------
@@ -231,10 +223,9 @@ def create(cls, filetype, filepaths, studies, submitted_to_insdc=False):
231
223
# Add the raw data to the database, and get the raw data id back
232
224
conn_handler = SQLConnectionHandler ()
233
225
rd_id = conn_handler .execute_fetchone (
234
- "INSERT INTO qiita.{0} (filetype_id, submitted_to_insdc) VALUES "
235
- "(%(type_id)s, %(insdc)s) RETURNING "
236
- "raw_data_id" .format (cls ._table ), {'type_id' : filetype ,
237
- 'insdc' : submitted_to_insdc })[0 ]
226
+ "INSERT INTO qiita.{0} (filetype_id) VALUES (%s) RETURNING "
227
+ "raw_data_id" .format (cls ._table ),
228
+ (filetype , ))[0 ]
238
229
rd = cls (rd_id )
239
230
240
231
# Connect the raw data with its studies
@@ -247,19 +238,6 @@ def create(cls, filetype, filepaths, studies, submitted_to_insdc=False):
247
238
248
239
return rd
249
240
250
- def is_submitted_to_insdc (self ):
251
- r"""Tells if the raw data has been submitted to insdc
252
-
253
- Returns
254
- -------
255
- bool
256
- True if the raw data have been submitted to insdc. False otherwise
257
- """
258
- conn_handler = SQLConnectionHandler ()
259
- return conn_handler .execute_fetchone (
260
- "SELECT submitted_to_insdc FROM qiita.{0} "
261
- "WHERE raw_data_id=%s" .format (self ._table ), [self .id ])[0 ]
262
-
263
241
@property
264
242
def studies (self ):
265
243
r"""The list of study ids to which the raw data belongs to
@@ -287,6 +265,7 @@ class PreprocessedData(BaseData):
287
265
Methods
288
266
-------
289
267
create
268
+ is_submitted_to_insdc
290
269
291
270
See Also
292
271
--------
@@ -297,16 +276,15 @@ class PreprocessedData(BaseData):
297
276
_data_filepath_table = "preprocessed_filepath"
298
277
_data_filepath_column = "preprocessed_data_id"
299
278
_study_preprocessed_table = "study_preprocessed_data"
279
+ _raw_preprocessed_table = "raw_preprocessed_data"
300
280
301
281
@classmethod
302
- def create (cls , raw_data , study , preprocessed_params_table ,
303
- preprocessed_params_id , filepaths ):
282
+ def create (cls , study , preprocessed_params_table , preprocessed_params_id ,
283
+ filepaths , raw_data = None , submitted_to_insdc = False ):
304
284
r"""Creates a new object with a new id on the storage system
305
285
306
286
Parameters
307
287
----------
308
- raw_data : RawData
309
- The RawData object used as base to this preprocessed data
310
288
study : Study
311
289
The study to which this preprocessed data belongs to
312
290
preprocessed_params_table : str
@@ -317,6 +295,10 @@ def create(cls, raw_data, study, preprocessed_params_table,
317
295
filepaths : iterable of tuples (str, int)
318
296
The list of paths to the preprocessed files and its filepath type
319
297
identifier
298
+ submitted_to_insdc : bool, optional
299
+ If true, the raw data files have been submitted to insdc
300
+ raw_data : RawData, optional
301
+ The RawData object used as base to this preprocessed data
320
302
321
303
Raises
322
304
------
@@ -333,11 +315,13 @@ def create(cls, raw_data, study, preprocessed_params_table,
333
315
# Add the preprocessed data to the database,
334
316
# and get the preprocessed data id back
335
317
ppd_id = conn_handler .execute_fetchone (
336
- "INSERT INTO qiita.{0} (raw_data_id, preprocessed_params_table, "
337
- "preprocessed_params_id) VALUES (%(raw_id)s, %(param_table)s, "
338
- "%(param_id)s) RETURNING preprocessed_data_id" .format (cls ._table ),
339
- {'raw_id' : raw_data .id , 'param_table' : preprocessed_params_table ,
340
- 'param_id' : preprocessed_params_id })[0 ]
318
+ "INSERT INTO qiita.{0} (preprocessed_params_table, "
319
+ "preprocessed_params_id, submitted_to_insdc) VALUES "
320
+ "(%(param_table)s, %(param_id)s, %(insdc)s) "
321
+ "RETURNING preprocessed_data_id" .format (cls ._table ),
322
+ {'param_table' : preprocessed_params_table ,
323
+ 'param_id' : preprocessed_params_id ,
324
+ 'insdc' : submitted_to_insdc })[0 ]
341
325
ppd = cls (ppd_id )
342
326
343
327
# Connect the preprocessed data with its study
@@ -346,6 +330,13 @@ def create(cls, raw_data, study, preprocessed_params_table,
346
330
"VALUES (%s, %s)" .format (ppd ._study_preprocessed_table ),
347
331
(study .id , ppd .id ))
348
332
333
+ if raw_data is not None :
334
+ # Connect the preprocessed data with the raw data
335
+ conn_handler .execute (
336
+ "INSERT INTO qiita.{0} (raw_data_id, preprocessed_data_id) "
337
+ "VALUES (%s, %s)" .format (cls ._raw_preprocessed_table ),
338
+ (raw_data .id , ppd_id ))
339
+
349
340
ppd ._add_filepaths (filepaths , conn_handler )
350
341
return ppd
351
342
@@ -355,7 +346,7 @@ def raw_data(self):
355
346
conn_handler = SQLConnectionHandler ()
356
347
return conn_handler .execute_fetchone (
357
348
"SELECT raw_data_id FROM qiita.{0} WHERE "
358
- "preprocessed_data_id=%s" .format (self ._table ),
349
+ "preprocessed_data_id=%s" .format (self ._raw_preprocessed_table ),
359
350
[self ._id ])[0 ]
360
351
361
352
@property
@@ -372,6 +363,19 @@ def study(self):
372
363
"preprocessed_data_id=%s" .format (self ._study_preprocessed_table ),
373
364
[self ._id ])[0 ]
374
365
366
+ def is_submitted_to_insdc (self ):
367
+ r"""Tells if the raw data has been submitted to insdc
368
+
369
+ Returns
370
+ -------
371
+ bool
372
+ True if the raw data have been submitted to insdc. False otherwise
373
+ """
374
+ conn_handler = SQLConnectionHandler ()
375
+ return conn_handler .execute_fetchone (
376
+ "SELECT submitted_to_insdc FROM qiita.{0} "
377
+ "WHERE preprocessed_data_id=%s" .format (self ._table ), (self .id ,))[0 ]
378
+
375
379
376
380
class ProcessedData (BaseData ):
377
381
r"""Object for dealing with processed data
@@ -392,15 +396,14 @@ class ProcessedData(BaseData):
392
396
_table = "processed_data"
393
397
_data_filepath_table = "processed_filepath"
394
398
_data_filepath_column = "processed_data_id"
399
+ _preprocessed_processed_table = "preprocessed_processed_data"
395
400
396
401
@classmethod
397
- def create (cls , preprocessed_data , processed_params_table ,
398
- processed_params_id , filepaths , processed_date = None ):
402
+ def create (cls , processed_params_table , processed_params_id , filepaths ,
403
+ preprocessed_data = None , processed_date = None ):
399
404
r"""
400
405
Parameters
401
406
----------
402
- preprocessed_data : PreprocessedData
403
- The PreprocessedData object used as base to this processed data
404
407
processed_params_table : str
405
408
Name of the table that holds the preprocessing parameters used
406
409
processed_params_id : int
@@ -409,6 +412,8 @@ def create(cls, preprocessed_data, processed_params_table,
409
412
filepaths : iterable of tuples (str, int)
410
413
The list of paths to the processed files and its filepath type
411
414
identifier
415
+ preprocessed_data : PreprocessedData, optional
416
+ The PreprocessedData object used as base to this processed data
412
417
processed_date : datetime, optional
413
418
Date in which the data have been processed. Default: now
414
419
@@ -432,16 +437,22 @@ def create(cls, preprocessed_data, processed_params_table,
432
437
# Add the processed data to the database,
433
438
# and get the processed data id back
434
439
pd_id = conn_handler .execute_fetchone (
435
- "INSERT INTO qiita.{0} (preprocessed_data_id, "
436
- "processed_params_table, processed_params_id, processed_date) "
437
- "VALUES (%(prep_data_id)s, %(param_table)s, %(param_id)s, "
438
- "%(date)s) RETURNING processed_data_id" .format (cls ._table ),
439
- {'prep_data_id' : preprocessed_data .id ,
440
- 'param_table' : processed_params_table ,
440
+ "INSERT INTO qiita.{0} (processed_params_table, "
441
+ "processed_params_id, processed_date) VALUES (%(param_table)s, "
442
+ "%(param_id)s, %(date)s) RETURNING "
443
+ "processed_data_id" .format (cls ._table ),
444
+ {'param_table' : processed_params_table ,
441
445
'param_id' : processed_params_id ,
442
446
'date' : processed_date })[0 ]
443
447
444
448
pd = cls (pd_id )
449
+
450
+ if preprocessed_data is not None :
451
+ conn_handler .execute (
452
+ "INSERT INTO qiita.{0} (preprocessed_data_id, "
453
+ "processed_data_id) VALUES "
454
+ "(%s, %s)" .format (cls ._preprocessed_processed_table ),
455
+ (preprocessed_data .id , pd_id ))
445
456
pd ._add_filepaths (filepaths , conn_handler )
446
457
return cls (pd_id )
447
458
@@ -451,18 +462,18 @@ def preprocessed_data(self):
451
462
conn_handler = SQLConnectionHandler ()
452
463
return conn_handler .execute_fetchone (
453
464
"SELECT preprocessed_data_id FROM qiita.{0} WHERE "
454
- "processed_data_id=%s" .format (self ._table ),
465
+ "processed_data_id=%s" .format (self ._preprocessed_processed_table ),
455
466
[self ._id ])[0 ]
456
467
457
468
@property
458
469
def data_type (self ):
459
470
r"""The data_type of the data used"""
460
471
conn_handler = SQLConnectionHandler ()
461
- sql = ("SELECT DISTINCT DT.data_type FROM qiita.processed_data PD "
462
- "JOIN qiita.preprocessed_data PPD on PD.preprocessed_data_id "
463
- "= PPD.preprocessed_data_id JOIN qiita.raw_data RD on "
464
- "PPD.raw_data_id = RD.raw_data_id "
465
- "JOIN qiita.common_prep_info CPI ON RD.raw_data_id = "
466
- "CPI.raw_data_id JOIN qiita.data_type DT ON CPI .data_type_id = "
467
- "DT.data_type_id WHERE PD .processed_data_id = %s" )
472
+ sql = ("SELECT DISTINCT DT.data_type FROM "
473
+ "qiita.preprocessed_processed_data PPD JOIN "
474
+ "qiita.raw_preprocessed_data RPD on PPD.preprocessed_data_id = "
475
+ "RPD.preprocessed_data_id JOIN qiita.common_prep_info CPI ON "
476
+ "RPD.raw_data_id = CPI.raw_data_id JOIN qiita.data_type DT ON "
477
+ "CPI.data_type_id = DT .data_type_id WHERE "
478
+ "PPD .processed_data_id = %s" )
468
479
return conn_handler .execute_fetchone (sql , [self ._id ])[0 ]
0 commit comments