10
10
import yaml
11
11
from couchdb import Server
12
12
import datetime
13
+ from collections import OrderedDict
13
14
14
15
FIRST_ROW = {'components' : 9 ,
15
16
'products' : 4 }
16
17
SHEET = {'components' : 'Price list' ,
17
18
'products' : 'Products' }
18
19
19
20
# Skip columns which are calculated from the other fields
20
- SKIP = {'components' : ['Price' , 'Total' , 'Per unit' ]}
21
+ SKIP = {'components' : ['Price' , 'Total' , 'Per unit' ],
22
+ 'products' : ['Internal' , 'External' ]}
23
+
24
+ # The name of the _id_ key and which variables that cannot be changed
25
+ # while keeping the same _id_. If an update of any of these fields is needed,
26
+ # a new id needs to be created.
27
+ CONSERVED_KEY_SETS = {'products' : ('ID' , ['Category' , 'Type' , 'Name' ]),
28
+ 'components' : ('REF_ID' , ['Category' , 'Type' , 'Product name' ])}
29
+
30
+ # The combination of these "columns" need to be unique within the document
31
+ UNIQUE_KEY_SETS = {'products' : ('ID' , ['Category' , 'Type' , 'Name' ]),
32
+ 'components' : ('REF_ID' , ['Category' , 'Type' , 'Product name' , 'Units' ])}
21
33
22
34
MAX_NR_ROWS = 200
23
35
36
+ # Assuming the rows of products are sorted in the preferred order
37
+
24
38
# Set up a logger with colored output
25
39
logger = logging .getLogger (__name__ )
26
40
logger .propagate = False # Otherwise the messages appeared twice
27
41
coloredlogs .install (level = 'INFO' , logger = logger ,
28
42
fmt = '%(asctime)s %(levelname)s %(message)s' )
29
43
44
+
45
+ def check_unique (items , type ):
46
+ """Make sure all items within _items_
47
+
48
+ fulfill the uniqueness criteria according to the UNIQUE_KEY_SETS
49
+ """
50
+ key_val_set = set ()
51
+ for id , item in items .items ():
52
+ id_key , keys = UNIQUE_KEY_SETS [type ]
53
+ t = tuple (item [key ] for key in keys )
54
+
55
+ # Check that it is not already added
56
+ if t in key_val_set :
57
+ raise ValueError ("Key combination {}:{} is included multiple "
58
+ "times in the {} sheet. "
59
+ "ABORTING." .format (keys , t , type ))
60
+ key_val_set .add (t )
61
+ return True
62
+
63
+
64
+ def check_conserved (new_items , current_items , type ):
65
+ """Ensures the keys in CONSERVED_KEY_SETS are conserved for each given id.
66
+
67
+ Compares the new version against the currently active one.
68
+ Params:
69
+ new_items - A dict of the items that are to be added
70
+ with ID attribute as the key.
71
+ current_items - A dict of the items currently in the database
72
+ with ID attribute as the key.
73
+ type - Either "components" or "products"
74
+ """
75
+ conserved_keys = CONSERVED_KEY_SETS [type ][1 ]
76
+
77
+ for id , new_item in new_items .items ():
78
+ if str (id ) in current_items :
79
+ for conserved_key in conserved_keys :
80
+ if new_item [conserved_key ] != current_items [str (id )][conserved_key ]:
81
+ raise ValueError ("{} need to be conserved for {}."
82
+ " Violated for component with id {}. "
83
+ "Found {} for new and {} for current. "
84
+ "ABORTING!" .format (
85
+ conserved_key ,
86
+ type ,
87
+ id ,
88
+ new_item [conserved_key ],
89
+ current_items [str (id )][conserved_key ]
90
+ ))
91
+
92
+
93
+ def get_current_items (db , type ):
94
+ rows = db .view ("entire_document/by_version" , descending = True , limit = 1 ).rows
95
+ if len (rows ) != 0 :
96
+ doc = rows [0 ].value
97
+ return doc [type ]
98
+ return {}
99
+
100
+
30
101
def is_empty_row (comp ):
31
- for k ,v in comp .items ():
102
+ for k , v in comp .items ():
32
103
if v != '' :
33
104
return False
34
105
return True
35
106
107
+
108
+ def load_products (wb ):
109
+ ws = wb [SHEET ['products' ]]
110
+
111
+ row = FIRST_ROW ['products' ]
112
+ header_row = row - 1
113
+ header_cells = ws [header_row ]
114
+ header = {}
115
+ for cell in header_cells :
116
+ cell_val = cell .value
117
+
118
+ if cell_val not in SKIP ['products' ]:
119
+ # Get cell column as string
120
+ cell_column = cell .coordinate .replace (str (header_row ), '' )
121
+ header [cell_column ] = cell_val
122
+
123
+ products = OrderedDict ()
124
+ # Unkown number of rows
125
+ while row < MAX_NR_ROWS :
126
+ new_product = {}
127
+ for col , header_val in header .items ():
128
+ val = ws ["{}{}" .format (col , row )].value
129
+ if val is None :
130
+ val = ''
131
+ if header_val == 'Components' :
132
+ # Some cells might be interpreted as floats
133
+ # e.g. "37,78"
134
+ val = str (val )
135
+ val = val .replace ('.' , ',' )
136
+ if val :
137
+ # Make a list with all individual components
138
+ val = [int (prod_id ) for prod_id in val .split (',' )]
139
+ new_product [header_val ] = val
140
+
141
+ if not is_empty_row (new_product ):
142
+ product_row = row - FIRST_ROW ['products' ] + 1
143
+ # the row in the sheet is used as ID.
144
+ # In the future this will have to be backpropagated to the sheet.
145
+ products [product_row ] = new_product
146
+ row += 1
147
+
148
+ return products
149
+
150
+
36
151
def load_components (wb ):
37
152
ws = wb [SHEET ['components' ]]
38
153
@@ -44,7 +159,7 @@ def load_components(wb):
44
159
for cell in header_cells :
45
160
cell_val = cell .value
46
161
if cell_val == 'ID' :
47
- cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
162
+ cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
48
163
if cell_val not in SKIP ['components' ]:
49
164
# Get cell column as string
50
165
cell_column = cell .coordinate .replace (str (header_row ), '' )
@@ -54,50 +169,111 @@ def load_components(wb):
54
169
while row < MAX_NR_ROWS :
55
170
new_component = {}
56
171
for col , header_val in header .items ():
57
- val = ws ["{}{}" .format (col ,row )].value
172
+ val = ws ["{}{}" .format (col , row )].value
58
173
if val is None :
59
174
val = ''
60
175
new_component [header_val ] = val
61
176
177
+ if new_component ['REF_ID' ] in components :
178
+ # Violates the uniqueness of the ID
179
+ raise ValueError ("ID {} is included multiple "
180
+ "times in the {} sheet. "
181
+ "ABORTING." .format (new_component ['REF_ID' ], type ))
182
+
62
183
if not is_empty_row (new_component ):
63
184
components [new_component ['REF_ID' ]] = new_component
64
185
row += 1
65
186
66
187
return components
67
188
68
189
69
- def main (input_sheet , config , user , user_email ,
190
+ def get_current_version (db ):
191
+ view_result = db .view ('entire_document/by_version' , limit = 1 ,
192
+ descending = True )
193
+ if view_result .rows :
194
+ return int (view_result .rows [0 ].value ['Version' ])
195
+ else :
196
+ return 0
197
+
198
+
199
+ def main (input_file , config , user , user_email ,
70
200
add_components = False , add_products = False , push = False ):
71
201
with open (config ) as settings_file :
72
202
server_settings = yaml .load (settings_file )
73
203
couch = Server (server_settings .get ("couch_server" , None ))
74
204
75
- wb = load_workbook (input_sheet , read_only = True , data_only = True )
205
+ wb = load_workbook (input_file , read_only = True , data_only = True )
76
206
77
207
if add_components :
78
208
db = couch ['pricing_components' ]
79
209
components = load_components (wb )
210
+ check_unique (components , 'components' )
211
+
212
+ current_components = get_current_items (db , 'components' )
213
+
214
+ # Otherwise the first version
215
+ if current_components :
216
+ check_conserved (components , current_components , 'components' )
217
+
80
218
doc = {}
81
219
doc ['components' ] = components
82
220
doc ['Issued by user' ] = user
83
221
doc ['Issued by user email' ] = user_email
84
222
doc ['Issued at' ] = datetime .datetime .now ().isoformat ()
85
- doc ['Version' ] = 2
223
+
224
+ current_version = get_current_version (db )
225
+ doc ['Version' ] = current_version + 1
226
+
227
+ if push :
228
+ logger .info (
229
+ 'Pushing components document version {}' .format (doc ['Version' ])
230
+ )
231
+ db .save (doc )
232
+ else :
233
+ print (doc )
234
+
235
+ if add_products :
236
+ db = couch ['pricing_products' ]
237
+ products = load_products (wb )
238
+
239
+ check_unique (products , 'products' )
240
+
241
+ current_products = get_current_items (db , 'products' )
242
+
243
+ # Otherwise the first version
244
+ if current_products :
245
+ check_conserved (products , current_products , 'products' )
246
+
247
+ doc = {}
248
+ doc ['products' ] = products
249
+ doc ['Issued by user' ] = user
250
+ doc ['Issued by user email' ] = user_email
251
+ doc ['Issued at' ] = datetime .datetime .now ().isoformat ()
252
+
253
+ current_version = get_current_version (db )
254
+ doc ['Version' ] = current_version + 1
255
+
86
256
if push :
257
+ logger .info (
258
+ 'Pushing products document version {}' .format (doc ['Version' ])
259
+ )
87
260
db .save (doc )
88
261
else :
89
262
print (doc )
90
263
91
264
92
265
if __name__ == '__main__' :
93
266
parser = argparse .ArgumentParser (description = __doc__ )
94
- parser .add_argument ('pricing_excel_sheet ' ,
95
- help = "The excel sheet currently used for pricing" )
267
+ parser .add_argument ('pricing_excel_file ' ,
268
+ help = "The excel file currently used for pricing" )
96
269
parser .add_argument ('--statusdb_config' , required = True ,
97
270
help = 'The genomics-status settings.yaml file.' )
98
271
parser .add_argument ('--components' , action = 'store_true' ,
99
272
help = 'Add the pricing components '
100
273
'from the "Price list" sheet.' )
274
+ parser .add_argument ('--products' , action = 'store_true' ,
275
+ help = 'Add the pricing products '
276
+ 'from the sheet.' )
101
277
parser .add_argument ('--push' , action = 'store_true' ,
102
278
help = 'Use this tag to actually push to the databse,'
103
279
' otherwise it is just dryrun' )
@@ -107,5 +283,6 @@ def main(input_sheet, config, user, user_email,
107
283
help = 'Email used to tell who changed the document' )
108
284
args = parser .parse_args ()
109
285
110
- main (args .pricing_excel_sheet , args .statusdb_config , args .user ,
111
- args .user_email , add_components = args .components , push = args .push )
286
+ main (args .pricing_excel_file , args .statusdb_config , args .user ,
287
+ args .user_email , add_components = args .components ,
288
+ add_products = args .products , push = args .push )
0 commit comments