Skip to content

Commit b4c94c3

Browse files
Charles CowartCharles Cowart
authored andcommitted
All columns are present and filled out.
All columns are present and filled out. There are some partial null columns where rows represent control data; this will be resolved in a later push.
1 parent e1413db commit b4c94c3

File tree

1 file changed

+90
-76
lines changed

1 file changed

+90
-76
lines changed

labcontrol/db/process.py

Lines changed: 90 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3728,6 +3728,8 @@ def _get_metagenomics_data_for_prep(self):
37283728
primersetcp2.barcode_seq AS barcode_i7,
37293729
primersetcp.primer_set_id AS primer_set_id_i5,
37303730
primersetcp2.primer_set_id AS primer_set_id_i7,
3731+
primersetcp.external_id AS i5_index_id,
3732+
primersetcp2.external_id AS i7_index_id,
37313733
primersetplate.external_id AS primer_plate_i5,
37323734
primersetplate2.external_id AS primer_plate_i7,
37333735
primerworkingplateprpr.run_date AS primer_date_i5,
@@ -3855,6 +3857,43 @@ def _get_metagenomics_data_for_prep(self):
38553857
# query.
38563858
d['instrument_model'] = inst_mdl
38573859

3860+
# note that the correct term is 'Kapa', not 'kappa'.
3861+
id = d['kappa_hyper_plus_kit_id']
3862+
d['kapa_hyper_plus_kit_lot'] = reagent[id]['external_lot_id']
3863+
3864+
id = d['stub_lot_id']
3865+
d['stub_lot_id'] = reagent[id]['external_lot_id']
3866+
3867+
# We have two robot IDs. Not sure which one is rightfully the
3868+
# 'extraction robot', but the example value is both strings
3869+
# separated by an underscore. Tentatively using this combined
3870+
# value.
3871+
id = d['gepmotion_robot_id']
3872+
epm_robot = equipment[id]['external_id']
3873+
id = d['kingfisher_robot_id']
3874+
kf_robot = equipment[id]['external_id']
3875+
d['extraction_robot'] = '%s_%s' % (epm_robot, kf_robot)
3876+
3877+
# Note extraction_kit_id references (as in foreign-key)
3878+
# reagent_composition(reagent_composition_id).
3879+
id = d['extraction_kit_id']
3880+
d['extraction_kit_lot'] = reagent[id]['external_lot_id']
3881+
3882+
id = d['epmotion_tool_id']
3883+
d['epmotion_tool_name'] = equipment[id]['external_id']
3884+
3885+
# for now, platform is hard-coded to 'Illumina'
3886+
# will need to change once Nanopore is supported by LC
3887+
# and we have a column to record one or the other.
3888+
d['platform'] = 'Illumina'
3889+
3890+
#LIBRARY_CONSTRUCTION_PROTOCOL
3891+
# these key/value pairs are tentatively hard-coded for now.
3892+
# TODO: Awaiting response from team.
3893+
d['sequencing_method'] = 'sequencing by synthesis'
3894+
d['run_center'] = 'UCSDMI'
3895+
d['library_construction_protocol'] = 'KL KHP'
3896+
38583897
return results
38593898

38603899
def _generate_metagenomics_prep_information(self):
@@ -3889,37 +3928,6 @@ def _generate_metagenomics_prep_information(self):
38893928
***20 'study_id': None
38903929
***26 'sample_id': None
38913930
"""
3892-
3893-
'''
3894-
# SAMPLE
3895-
1 'content': '1.SKB1.640202.Test.plate.4.A6',
3896-
2 'is_control': False,
3897-
3 'primer_plate_i7': 'iTru 7 primer',
3898-
4 'normalization_process_id': 1,
3899-
5 'experiment_design_description': 'Analysis ... Plant Microbiome',
3900-
6 'barcode_i7': 'GTTCTCGT',
3901-
7 'primer_date_i5': '2017-10-23T19:20:25',
3902-
8 'primer_plate_i5': 'iTru 5 primer',
3903-
9 'primer_set_id_i5': 2,
3904-
10 'extraction_kit_id': 2,
3905-
11 'epmotion_tool_id': 15,
3906-
12 'well_id': 'A6',
3907-
13 'gepmotion_robot_id': 5,
3908-
14 'project_name': 'Cannabis Soils',
3909-
15 'orig_name': '1.SKB1.640202',
3910-
16 'kingfisher_robot_id': 11,
3911-
17 'sample_plate': 'Test plate 4',
3912-
18 'col_num': 6,
3913-
19 'plating': 'test@foo.bar',
3914-
20 'study_id': 1,
3915-
21 'row_num': 1,
3916-
22 'primer_date_i7': '2017-10-23T19:20:25',
3917-
23 'primer_set_id_i7': 2,
3918-
24 'stub_lot_id': 6,
3919-
25 'kappa_hyper_plus_kit_id': 5,
3920-
26 'sample_id': '1.SKB1.640202',
3921-
27 'barcode_i5': 'GAAGATCC'
3922-
'''
39233931
results = self._get_metagenomics_data_for_prep()
39243932

39253933
data = {}
@@ -3959,60 +3967,52 @@ def _generate_metagenomics_prep_information(self):
39593967
for prep_sheet_id, prep_sheet in data.items():
39603968
prep_sheet = pd.DataFrame.from_dict(prep_sheet, orient='index')
39613969

3962-
# an example of renaming a key/column before output
3970+
# mapping keys to expected names for columns in the final output
39633971
mv = {"orig_name": "Orig_name",
39643972
"well_id": "Well_ID",
39653973
"sample_plate": "Sample_Plate",
39663974
"project_name": "Project_name",
3967-
"plating": "Plating"}
3975+
"plating": "Plating",
3976+
"barcode_i7": "index",
3977+
"barcode_i5": "index2",
3978+
"primer_plate_i7": "i7_Primer_Plate",
3979+
"primer_plate_i5": "i5_Primer_Plate",
3980+
"primer_date_i7": "i7_Primer_date",
3981+
"primer_date_i5": "i5_Primer_date",
3982+
"experiment_design_description":
3983+
"EXPERIMENT_DESIGN_DESCRIPTION",
3984+
"instrument_model": "INSTRUMENT_MODEL",
3985+
"kapa_hyper_plus_kit_lot": "KapaHyperPlusKit_lot",
3986+
"stub_lot_id": "Stub_lot",
3987+
"platform": "PLATFORM",
3988+
"sequencing_method": "sequencing_meth",
3989+
"run_center": "RUN_CENTER",
3990+
"extraction_robot": "Extraction_robot",
3991+
"extraction_kit_lot": "ExtractionKit_lot",
3992+
"epmotion_tool_name": "TM1000_8_tool",
3993+
"i5_index_id": "i5_Index_ID",
3994+
"i7_index_id": "i7_Index_ID",
3995+
"library_construction_protocol":
3996+
"LIBRARY_CONSTRUCTION_PROTOCOL"}
39683997
prep_sheet = prep_sheet.rename(columns=mv)
39693998

3970-
def generate_well_description(row):
3971-
return
3972-
3973-
# Copy columns
3999+
# Synthesize new columns
4000+
# Note: these could also be performed in
4001+
# _get_metagenomics_data_for_prep() before returning the
4002+
# dictionary.
39744003
prep_sheet['Sample_ID'] = prep_sheet['Orig_name']
3975-
#prep_sheet['Well_description'] = prep_sheet['Orig_name']
3976-
3977-
# well description is very beta
3978-
prep_sheet['Well_description'] = ['%s_%s_%s' % (x.Sample_Plate, i, x.Well_ID) for i, x in prep_sheet.iterrows()]
3979-
3980-
# Alter columns
39814004
# TODO: May need replacing w/proper method (see SpreadSheet)
3982-
prep_sheet['Sample_ID'].replace(regex=True,inplace=True,to_replace=r'^\d+\.',value=r'')
3983-
3984-
# Add empty columns to test output
3985-
prep_sheet['EXPERIMENT_DESIGN_DESCRIPTION'] = None
3986-
prep_sheet['ExtractionKit_lot'] = None
3987-
prep_sheet['Extraction_robot'] = None
3988-
prep_sheet['I5_Index_ID'] = None
3989-
prep_sheet['INSTRUMENT_MODEL'] = None
3990-
prep_sheet['KappaHyperPlusKit_lot'] = None
3991-
prep_sheet['LIBRARY_CONSTRUCTION_PROTOCOL'] = None
3992-
prep_sheet['Lane'] = None
3993-
prep_sheet['PLATFORM'] = None
3994-
prep_sheet['RUN_CENTER'] = None
3995-
prep_sheet['RUN_DATE'] = None
3996-
prep_sheet['RUN_PREFIX'] = None
3997-
prep_sheet['Stub_lot'] = None
3998-
prep_sheet['TM1000_8_tool'] = None
3999-
prep_sheet['center_name'] = None
4000-
prep_sheet['center_project_name'] = None
4001-
prep_sheet['forward_read'] = None
4002-
prep_sheet['i5_Primer_Plate'] = None
4003-
prep_sheet['i5_Primer_date'] = None
4004-
prep_sheet['i7_Index_ID'] = None
4005-
prep_sheet['i7_Primer_Plate'] = None
4006-
prep_sheet['i7_Primer_date'] = None
4007-
prep_sheet['index'] = None
4008-
prep_sheet['index2'] = None
4009-
prep_sheet['reverse_read'] = None
4010-
#prep_sheet['sample_name'] = None
4011-
prep_sheet['sequencing_meth'] = None
4005+
prep_sheet['Sample_ID'].replace(regex=True,
4006+
inplace=True,
4007+
to_replace=r'^\d+\.',
4008+
value=r'')
4009+
4010+
prep_sheet['Well_description'] =\
4011+
['%s_%s_%s' % (x.Sample_Plate, i, x.Well_ID)
4012+
for i, x in prep_sheet.iterrows()]
40124013

40134014
# re-order columns, keeping only what is needed
40144015
order = [
4015-
#'sample_name',
40164016
'Sample_ID',
40174017
'Orig_name',
40184018
'Well_ID',
@@ -4023,13 +4023,13 @@ def generate_well_description(row):
40234023
'ExtractionKit_lot',
40244024
'Extraction_robot',
40254025
'TM1000_8_tool',
4026-
'KappaHyperPlusKit_lot',
4026+
'KapaHyperPlusKit_lot',
40274027
'Stub_lot',
40284028
'i7_Index_ID',
40294029
'index',
40304030
'i7_Primer_Plate',
40314031
'i7_Primer_date',
4032-
'I5_Index_ID',
4032+
'i5_Index_ID',
40334033
'index2',
40344034
'i5_Primer_Plate',
40354035
'i5_Primer_date',
@@ -4047,9 +4047,22 @@ def generate_well_description(row):
40474047
'forward_read',
40484048
'reverse_read']
40494049

4050+
# These columns are to be supplied blank
4051+
prep_sheet['RUN_DATE'] = None
4052+
prep_sheet['RUN_PREFIX'] = None
4053+
prep_sheet['Lane'] = None
4054+
prep_sheet['forward_read'] = None
4055+
prep_sheet['reverse_read'] = None
4056+
prep_sheet['center_name'] = None
4057+
prep_sheet['center_project_name'] = None
4058+
40504059
prep_sheet = prep_sheet[order]
40514060

4061+
# write out the DataFrame to TSV format
40524062
o = StringIO()
4063+
4064+
# Note: this is how the required 'sample_name' column is added to
4065+
# the final output TSV as well.
40534066
prep_sheet.to_csv(o, sep='\t', index_label='sample_name')
40544067
data[prep_sheet_id] = o.getvalue()
40554068

@@ -4059,4 +4072,5 @@ def generate_well_description(row):
40594072

40604073
# DEBUG: Identify All-NULL columns
40614074
print(prep_sheet.info())
4075+
40624076
return data

0 commit comments

Comments
 (0)