@@ -3728,6 +3728,8 @@ def _get_metagenomics_data_for_prep(self):
37283728 primersetcp2.barcode_seq AS barcode_i7,
37293729 primersetcp.primer_set_id AS primer_set_id_i5,
37303730 primersetcp2.primer_set_id AS primer_set_id_i7,
3731+ primersetcp.external_id AS i5_index_id,
3732+ primersetcp2.external_id AS i7_index_id,
37313733 primersetplate.external_id AS primer_plate_i5,
37323734 primersetplate2.external_id AS primer_plate_i7,
37333735 primerworkingplateprpr.run_date AS primer_date_i5,
@@ -3855,6 +3857,43 @@ def _get_metagenomics_data_for_prep(self):
38553857 # query.
38563858 d ['instrument_model' ] = inst_mdl
38573859
3860+ # note that the correct term is 'Kapa', not 'kappa'.
3861+ id = d ['kappa_hyper_plus_kit_id' ]
3862+ d ['kapa_hyper_plus_kit_lot' ] = reagent [id ]['external_lot_id' ]
3863+
3864+ id = d ['stub_lot_id' ]
3865+ d ['stub_lot_id' ] = reagent [id ]['external_lot_id' ]
3866+
3867+ # We have two robot IDs. Not sure which one is rightfully the
3868+ # 'extraction robot', but the example value is both strings
3869+ # separated by an underscore. Tentatively using this combined
3870+ # value.
3871+ id = d ['gepmotion_robot_id' ]
3872+ epm_robot = equipment [id ]['external_id' ]
3873+ id = d ['kingfisher_robot_id' ]
3874+ kf_robot = equipment [id ]['external_id' ]
3875+ d ['extraction_robot' ] = '%s_%s' % (epm_robot , kf_robot )
3876+
3877+ # Note extraction_kit_id references (as in foreign-key)
3878+ # reagent_composition(reagent_composition_id).
3879+ id = d ['extraction_kit_id' ]
3880+ d ['extraction_kit_lot' ] = reagent [id ]['external_lot_id' ]
3881+
3882+ id = d ['epmotion_tool_id' ]
3883+ d ['epmotion_tool_name' ] = equipment [id ]['external_id' ]
3884+
3885+ # for now, platform is hard-coded to 'Illumina'
3886+ # will need to change once Nanopore is supported by LC
3887+ # and we have a column to record one or the other.
3888+ d ['platform' ] = 'Illumina'
3889+
3890+ #LIBRARY_CONSTRUCTION_PROTOCOL
3891+ # these key/value pairs are tentatively hard-coded for now.
3892+ # TODO: Awaiting response from team.
3893+ d ['sequencing_method' ] = 'sequencing by synthesis'
3894+ d ['run_center' ] = 'UCSDMI'
3895+ d ['library_construction_protocol' ] = 'KL KHP'
3896+
38583897 return results
38593898
38603899 def _generate_metagenomics_prep_information (self ):
@@ -3889,37 +3928,6 @@ def _generate_metagenomics_prep_information(self):
38893928 ***20 'study_id': None
38903929 ***26 'sample_id': None
38913930 """
3892-
3893- '''
3894- # SAMPLE
3895- 1 'content': '1.SKB1.640202.Test.plate.4.A6',
3896- 2 'is_control': False,
3897- 3 'primer_plate_i7': 'iTru 7 primer',
3898- 4 'normalization_process_id': 1,
3899- 5 'experiment_design_description': 'Analysis ... Plant Microbiome',
3900- 6 'barcode_i7': 'GTTCTCGT',
3901- 7 'primer_date_i5': '2017-10-23T19:20:25',
3902- 8 'primer_plate_i5': 'iTru 5 primer',
3903- 9 'primer_set_id_i5': 2,
3904- 10 'extraction_kit_id': 2,
3905- 11 'epmotion_tool_id': 15,
3906- 12 'well_id': 'A6',
3907- 13 'gepmotion_robot_id': 5,
3908- 14 'project_name': 'Cannabis Soils',
3909- 15 'orig_name': '1.SKB1.640202',
3910- 16 'kingfisher_robot_id': 11,
3911- 17 'sample_plate': 'Test plate 4',
3912- 18 'col_num': 6,
3913- 19 'plating': 'test@foo.bar',
3914- 20 'study_id': 1,
3915- 21 'row_num': 1,
3916- 22 'primer_date_i7': '2017-10-23T19:20:25',
3917- 23 'primer_set_id_i7': 2,
3918- 24 'stub_lot_id': 6,
3919- 25 'kappa_hyper_plus_kit_id': 5,
3920- 26 'sample_id': '1.SKB1.640202',
3921- 27 'barcode_i5': 'GAAGATCC'
3922- '''
39233931 results = self ._get_metagenomics_data_for_prep ()
39243932
39253933 data = {}
@@ -3959,60 +3967,52 @@ def _generate_metagenomics_prep_information(self):
39593967 for prep_sheet_id , prep_sheet in data .items ():
39603968 prep_sheet = pd .DataFrame .from_dict (prep_sheet , orient = 'index' )
39613969
3962- # an example of renaming a key/column before output
3970+ # mapping keys to expected names for columns in the final output
39633971 mv = {"orig_name" : "Orig_name" ,
39643972 "well_id" : "Well_ID" ,
39653973 "sample_plate" : "Sample_Plate" ,
39663974 "project_name" : "Project_name" ,
3967- "plating" : "Plating" }
3975+ "plating" : "Plating" ,
3976+ "barcode_i7" : "index" ,
3977+ "barcode_i5" : "index2" ,
3978+ "primer_plate_i7" : "i7_Primer_Plate" ,
3979+ "primer_plate_i5" : "i5_Primer_Plate" ,
3980+ "primer_date_i7" : "i7_Primer_date" ,
3981+ "primer_date_i5" : "i5_Primer_date" ,
3982+ "experiment_design_description" :
3983+ "EXPERIMENT_DESIGN_DESCRIPTION" ,
3984+ "instrument_model" : "INSTRUMENT_MODEL" ,
3985+ "kapa_hyper_plus_kit_lot" : "KapaHyperPlusKit_lot" ,
3986+ "stub_lot_id" : "Stub_lot" ,
3987+ "platform" : "PLATFORM" ,
3988+ "sequencing_method" : "sequencing_meth" ,
3989+ "run_center" : "RUN_CENTER" ,
3990+ "extraction_robot" : "Extraction_robot" ,
3991+ "extraction_kit_lot" : "ExtractionKit_lot" ,
3992+ "epmotion_tool_name" : "TM1000_8_tool" ,
3993+ "i5_index_id" : "i5_Index_ID" ,
3994+ "i7_index_id" : "i7_Index_ID" ,
3995+ "library_construction_protocol" :
3996+ "LIBRARY_CONSTRUCTION_PROTOCOL" }
39683997 prep_sheet = prep_sheet .rename (columns = mv )
39693998
3970- def generate_well_description ( row ):
3971- return
3972-
3973- # Copy columns
3999+ # Synthesize new columns
4000+ # Note: these could also be performed in
4001+ # _get_metagenomics_data_for_prep() before returning the
4002+ # dictionary.
39744003 prep_sheet ['Sample_ID' ] = prep_sheet ['Orig_name' ]
3975- #prep_sheet['Well_description'] = prep_sheet['Orig_name']
3976-
3977- # well description is very beta
3978- prep_sheet ['Well_description' ] = ['%s_%s_%s' % (x .Sample_Plate , i , x .Well_ID ) for i , x in prep_sheet .iterrows ()]
3979-
3980- # Alter columns
39814004 # TODO: May need replacing w/proper method (see SpreadSheet)
3982- prep_sheet ['Sample_ID' ].replace (regex = True ,inplace = True ,to_replace = r'^\d+\.' ,value = r'' )
3983-
3984- # Add empty columns to test output
3985- prep_sheet ['EXPERIMENT_DESIGN_DESCRIPTION' ] = None
3986- prep_sheet ['ExtractionKit_lot' ] = None
3987- prep_sheet ['Extraction_robot' ] = None
3988- prep_sheet ['I5_Index_ID' ] = None
3989- prep_sheet ['INSTRUMENT_MODEL' ] = None
3990- prep_sheet ['KappaHyperPlusKit_lot' ] = None
3991- prep_sheet ['LIBRARY_CONSTRUCTION_PROTOCOL' ] = None
3992- prep_sheet ['Lane' ] = None
3993- prep_sheet ['PLATFORM' ] = None
3994- prep_sheet ['RUN_CENTER' ] = None
3995- prep_sheet ['RUN_DATE' ] = None
3996- prep_sheet ['RUN_PREFIX' ] = None
3997- prep_sheet ['Stub_lot' ] = None
3998- prep_sheet ['TM1000_8_tool' ] = None
3999- prep_sheet ['center_name' ] = None
4000- prep_sheet ['center_project_name' ] = None
4001- prep_sheet ['forward_read' ] = None
4002- prep_sheet ['i5_Primer_Plate' ] = None
4003- prep_sheet ['i5_Primer_date' ] = None
4004- prep_sheet ['i7_Index_ID' ] = None
4005- prep_sheet ['i7_Primer_Plate' ] = None
4006- prep_sheet ['i7_Primer_date' ] = None
4007- prep_sheet ['index' ] = None
4008- prep_sheet ['index2' ] = None
4009- prep_sheet ['reverse_read' ] = None
4010- #prep_sheet['sample_name'] = None
4011- prep_sheet ['sequencing_meth' ] = None
4005+ prep_sheet ['Sample_ID' ].replace (regex = True ,
4006+ inplace = True ,
4007+ to_replace = r'^\d+\.' ,
4008+ value = r'' )
4009+
4010+ prep_sheet ['Well_description' ] = \
4011+ ['%s_%s_%s' % (x .Sample_Plate , i , x .Well_ID )
4012+ for i , x in prep_sheet .iterrows ()]
40124013
40134014 # re-order columns, keeping only what is needed
40144015 order = [
4015- #'sample_name',
40164016 'Sample_ID' ,
40174017 'Orig_name' ,
40184018 'Well_ID' ,
@@ -4023,13 +4023,13 @@ def generate_well_description(row):
40234023 'ExtractionKit_lot' ,
40244024 'Extraction_robot' ,
40254025 'TM1000_8_tool' ,
4026- 'KappaHyperPlusKit_lot ' ,
4026+ 'KapaHyperPlusKit_lot ' ,
40274027 'Stub_lot' ,
40284028 'i7_Index_ID' ,
40294029 'index' ,
40304030 'i7_Primer_Plate' ,
40314031 'i7_Primer_date' ,
4032- 'I5_Index_ID ' ,
4032+ 'i5_Index_ID ' ,
40334033 'index2' ,
40344034 'i5_Primer_Plate' ,
40354035 'i5_Primer_date' ,
@@ -4047,9 +4047,22 @@ def generate_well_description(row):
40474047 'forward_read' ,
40484048 'reverse_read' ]
40494049
4050+ # These columns are to be supplied blank
4051+ prep_sheet ['RUN_DATE' ] = None
4052+ prep_sheet ['RUN_PREFIX' ] = None
4053+ prep_sheet ['Lane' ] = None
4054+ prep_sheet ['forward_read' ] = None
4055+ prep_sheet ['reverse_read' ] = None
4056+ prep_sheet ['center_name' ] = None
4057+ prep_sheet ['center_project_name' ] = None
4058+
40504059 prep_sheet = prep_sheet [order ]
40514060
4061+ # write out the DataFrame to TSV format
40524062 o = StringIO ()
4063+
4064+ # Note: this is how the required 'sample_name' column is added to
4065+ # the final output TSV as well.
40534066 prep_sheet .to_csv (o , sep = '\t ' , index_label = 'sample_name' )
40544067 data [prep_sheet_id ] = o .getvalue ()
40554068
@@ -4059,4 +4072,5 @@ def generate_well_description(row):
40594072
40604073 # DEBUG: Identify All-NULL columns
40614074 print (prep_sheet .info ())
4075+
40624076 return data
0 commit comments