Skip to content

Commit 7d411af

Browse files
committed
Added a lot of validation
1 parent cfc1c89 commit 7d411af

File tree

1 file changed

+188
-11
lines changed

1 file changed

+188
-11
lines changed

push_new_pricing_to_statusdb.py

+188-11
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,144 @@
1010
import yaml
1111
from couchdb import Server
1212
import datetime
13+
from collections import OrderedDict
1314

1415
FIRST_ROW = {'components': 9,
1516
'products': 4}
1617
SHEET = {'components': 'Price list',
1718
'products': 'Products'}
1819

1920
# Skip columns which are calculated from the other fields
20-
SKIP = {'components': ['Price', 'Total', 'Per unit']}
21+
SKIP = {'components': ['Price', 'Total', 'Per unit'],
22+
'products': ['Internal', 'External']}
23+
24+
# The name of the _id_ key and which variables that cannot be changed
25+
# while keeping the same _id_. If an update of any of these fields is needed,
26+
# a new id needs to be created.
27+
CONSERVED_KEY_SETS = {'products': ('ID', ['Category', 'Type', 'Name']),
28+
'components': ('REF_ID', ['Category', 'Type', 'Product name'])}
29+
30+
# The combination of these "columns" need to be unique within the document
31+
UNIQUE_KEY_SETS = {'products': ('ID', ['Category', 'Type', 'Name']),
32+
'components': ('REF_ID', ['Category', 'Type', 'Product name', 'Units'])}
2133

2234
MAX_NR_ROWS = 200
2335

36+
# Assuming the rows of products are sorted in the preferred order
37+
2438
# Set up a logger with colored output
2539
logger = logging.getLogger(__name__)
2640
logger.propagate = False # Otherwise the messages appeared twice
2741
coloredlogs.install(level='INFO', logger=logger,
2842
fmt='%(asctime)s %(levelname)s %(message)s')
2943

44+
45+
def check_unique(items, type):
46+
"""Make sure all items within _items_
47+
48+
fulfill the uniqueness criteria according to the UNIQUE_KEY_SETS
49+
"""
50+
key_val_set = set()
51+
for id, item in items.items():
52+
id_key, keys = UNIQUE_KEY_SETS[type]
53+
t = tuple(item[key] for key in keys)
54+
55+
# Check that it is not already added
56+
if t in key_val_set:
57+
raise ValueError("Key combination {}:{} is included multiple "
58+
"times in the {} sheet. "
59+
"ABORTING.".format(keys, t, type))
60+
key_val_set.add(t)
61+
return True
62+
63+
64+
def check_conserved(new_items, current_items, type):
65+
"""Ensures the keys in CONSERVED_KEY_SETS are conserved for each given id.
66+
67+
Compares the new version against the currently active one.
68+
Params:
69+
new_items - A dict of the items that are to be added
70+
with ID attribute as the key.
71+
current_items - A dict of the items currently in the database
72+
with ID attribute as the key.
73+
type - Either "components" or "products"
74+
"""
75+
conserved_keys = CONSERVED_KEY_SETS[type][1]
76+
77+
for id, new_item in new_items.items():
78+
if str(id) in current_items:
79+
for conserved_key in conserved_keys:
80+
if new_item[conserved_key] != current_items[str(id)][conserved_key]:
81+
raise ValueError("{} need to be conserved for {}."
82+
" Violated for component with id {}. "
83+
"Found {} for new and {} for current. "
84+
"ABORTING!".format(
85+
conserved_key,
86+
type,
87+
id,
88+
new_item[conserved_key],
89+
current_items[str(id)][conserved_key]
90+
))
91+
92+
93+
def get_current_items(db, type):
94+
rows = db.view("entire_document/by_version", descending=True, limit=1).rows
95+
if len(rows) != 0:
96+
doc = rows[0].value
97+
return doc[type]
98+
return {}
99+
100+
30101
def is_empty_row(comp):
31-
for k,v in comp.items():
102+
for k, v in comp.items():
32103
if v != '':
33104
return False
34105
return True
35106

107+
108+
def load_products(wb):
109+
ws = wb[SHEET['products']]
110+
111+
row = FIRST_ROW['products']
112+
header_row = row - 1
113+
header_cells = ws[header_row]
114+
header = {}
115+
for cell in header_cells:
116+
cell_val = cell.value
117+
118+
if cell_val not in SKIP['products']:
119+
# Get cell column as string
120+
cell_column = cell.coordinate.replace(str(header_row), '')
121+
header[cell_column] = cell_val
122+
123+
products = OrderedDict()
124+
# Unkown number of rows
125+
while row < MAX_NR_ROWS:
126+
new_product = {}
127+
for col, header_val in header.items():
128+
val = ws["{}{}".format(col, row)].value
129+
if val is None:
130+
val = ''
131+
if header_val == 'Components':
132+
# Some cells might be interpreted as floats
133+
# e.g. "37,78"
134+
val = str(val)
135+
val = val.replace('.', ',')
136+
if val:
137+
# Make a list with all individual components
138+
val = [int(prod_id) for prod_id in val.split(',')]
139+
new_product[header_val] = val
140+
141+
if not is_empty_row(new_product):
142+
product_row = row - FIRST_ROW['products'] + 1
143+
# the row in the sheet is used as ID.
144+
# In the future this will have to be backpropagated to the sheet.
145+
products[product_row] = new_product
146+
row += 1
147+
148+
return products
149+
150+
36151
def load_components(wb):
37152
ws = wb[SHEET['components']]
38153

@@ -44,7 +159,7 @@ def load_components(wb):
44159
for cell in header_cells:
45160
cell_val = cell.value
46161
if cell_val == 'ID':
47-
cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
162+
cell_val = 'REF_ID' # Don't want to confuse it with couchdb ids
48163
if cell_val not in SKIP['components']:
49164
# Get cell column as string
50165
cell_column = cell.coordinate.replace(str(header_row), '')
@@ -54,50 +169,111 @@ def load_components(wb):
54169
while row < MAX_NR_ROWS:
55170
new_component = {}
56171
for col, header_val in header.items():
57-
val = ws["{}{}".format(col,row)].value
172+
val = ws["{}{}".format(col, row)].value
58173
if val is None:
59174
val = ''
60175
new_component[header_val] = val
61176

177+
if new_component['REF_ID'] in components:
178+
# Violates the uniqueness of the ID
179+
raise ValueError("ID {} is included multiple "
180+
"times in the {} sheet. "
181+
"ABORTING.".format(new_component['REF_ID'], type))
182+
62183
if not is_empty_row(new_component):
63184
components[new_component['REF_ID']] = new_component
64185
row += 1
65186

66187
return components
67188

68189

69-
def main(input_sheet, config, user, user_email,
190+
def get_current_version(db):
191+
view_result = db.view('entire_document/by_version', limit=1,
192+
descending=True)
193+
if view_result.rows:
194+
return int(view_result.rows[0].value['Version'])
195+
else:
196+
return 0
197+
198+
199+
def main(input_file, config, user, user_email,
70200
add_components=False, add_products=False, push=False):
71201
with open(config) as settings_file:
72202
server_settings = yaml.load(settings_file)
73203
couch = Server(server_settings.get("couch_server", None))
74204

75-
wb = load_workbook(input_sheet, read_only=True, data_only=True)
205+
wb = load_workbook(input_file, read_only=True, data_only=True)
76206

77207
if add_components:
78208
db = couch['pricing_components']
79209
components = load_components(wb)
210+
check_unique(components, 'components')
211+
212+
current_components = get_current_items(db, 'components')
213+
214+
# Otherwise the first version
215+
if current_components:
216+
check_conserved(components, current_components, 'components')
217+
80218
doc = {}
81219
doc['components'] = components
82220
doc['Issued by user'] = user
83221
doc['Issued by user email'] = user_email
84222
doc['Issued at'] = datetime.datetime.now().isoformat()
85-
doc['Version'] = 2
223+
224+
current_version = get_current_version(db)
225+
doc['Version'] = current_version + 1
226+
227+
if push:
228+
logger.info(
229+
'Pushing components document version {}'.format(doc['Version'])
230+
)
231+
db.save(doc)
232+
else:
233+
print(doc)
234+
235+
if add_products:
236+
db = couch['pricing_products']
237+
products = load_products(wb)
238+
239+
check_unique(products, 'products')
240+
241+
current_products = get_current_items(db, 'products')
242+
243+
# Otherwise the first version
244+
if current_products:
245+
check_conserved(products, current_products, 'products')
246+
247+
doc = {}
248+
doc['products'] = products
249+
doc['Issued by user'] = user
250+
doc['Issued by user email'] = user_email
251+
doc['Issued at'] = datetime.datetime.now().isoformat()
252+
253+
current_version = get_current_version(db)
254+
doc['Version'] = current_version + 1
255+
86256
if push:
257+
logger.info(
258+
'Pushing products document version {}'.format(doc['Version'])
259+
)
87260
db.save(doc)
88261
else:
89262
print(doc)
90263

91264

92265
if __name__ == '__main__':
93266
parser = argparse.ArgumentParser(description=__doc__)
94-
parser.add_argument('pricing_excel_sheet',
95-
help="The excel sheet currently used for pricing")
267+
parser.add_argument('pricing_excel_file',
268+
help="The excel file currently used for pricing")
96269
parser.add_argument('--statusdb_config', required=True,
97270
help='The genomics-status settings.yaml file.')
98271
parser.add_argument('--components', action='store_true',
99272
help='Add the pricing components '
100273
'from the "Price list" sheet.')
274+
parser.add_argument('--products', action='store_true',
275+
help='Add the pricing products '
276+
'from the sheet.')
101277
parser.add_argument('--push', action='store_true',
102278
help='Use this tag to actually push to the databse,'
103279
' otherwise it is just dryrun')
@@ -107,5 +283,6 @@ def main(input_sheet, config, user, user_email,
107283
help='Email used to tell who changed the document')
108284
args = parser.parse_args()
109285

110-
main(args.pricing_excel_sheet, args.statusdb_config, args.user,
111-
args.user_email, add_components=args.components, push=args.push)
286+
main(args.pricing_excel_file, args.statusdb_config, args.user,
287+
args.user_email, add_components=args.components,
288+
add_products=args.products, push=args.push)

0 commit comments

Comments
 (0)