-
Notifications
You must be signed in to change notification settings - Fork 5
/
mpeds_coder.py
2831 lines (2276 loc) · 91.4 KB
/
mpeds_coder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
MPEDS Annotation Interface
~~~~~~
Alex Hanna
@alexhanna
alex.hanna@gmail.com
"""
## base
import json
import math
import os
import re
import string
import sys
import urllib
import datetime as dt
from random import choice
import yaml
from collections import OrderedDict
import pprint
if (sys.version_info < (3, 0)):
import urllib2
else:
import urllib.request
## pandas
import pandas as pd
import numpy as np
## lxml, time
from lxml import etree
from pytz import timezone
## flask
from flask import Flask, request, session, g, redirect, url_for, abort, make_response,\
render_template, flash, jsonify
from flask_login import LoginManager, login_user, logout_user, current_user, login_required
## jinja
import jinja2
## article assignment library
import assign_lib
## db
from sqlalchemy import func, desc, distinct, and_, or_, text
from sqlalchemy.sql import select
## app-specific
from database import db_session
from models import ArticleMetadata, ArticleQueue, CanonicalEvent, CanonicalEventLink, \
CoderArticleAnnotation, CodeFirstPass, CodeSecondPass, CodeEventCreator, \
Event, EventCreatorQueue, EventFlag, EventMetadata, \
RecentEvent, RecentCanonicalEvent, SecondPassQueue, User
##### Enable OrderedDict with PyYAML
##### Copy-pasta from https://stackoverflow.com/a/21912744 on 2019-12-12
def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
class OrderedLoader(Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
construct_mapping)
return yaml.load(stream, OrderedLoader)
# create our application
app = Flask(__name__)
app.config.from_pyfile('config.py')
# customize template path
# copy-pasta from https://stackoverflow.com/questions/13598363/how-to-dynamically-select-template-directory-to-be-used-in-flask
if 'ADDITIONAL_TEMPLATE_DIR' in app.config and app.config.get('ADDITIONAL_TEMPLATE_DIR'):
template_loader = jinja2.ChoiceLoader([
jinja2.FileSystemLoader([app.config['ADDITIONAL_TEMPLATE_DIR']]),
app.jinja_loader])
app.jinja_loader = template_loader
## login stuff
lm = LoginManager()
lm.init_app(app)
lm.login_view = 'login'
## retrieve central time
central = timezone('US/Central')
## open-ended vars
v2 = [
('loc', 'Location'),
('time', 'Timing and Duration'),
('size', 'Size'),
('orgs', 'Organizations')
]
## informational vars
v3 = [
('actor', 'Protest actors'),
('police', 'Police/protester interactions'),
('counter','Counter protests'),
('viol', 'Violence')
]
event_creator_vars = []
## if there's the yaml text selects
if os.path.isfile(app.config['WD'] + '/text-selects.yaml'):
ecs = yaml.load(open(app.config['WD'] + '/text-selects.yaml', 'r'))
event_creator_vars = [(x, ecs[x]) for x in sorted(ecs.keys())]
elif os.path.isfile(app.config['WD'] + '/text-selects.csv'):
for var in open(app.config['WD'] + '/text-selects.csv', 'r').read().split('\n'):
var = var.strip()
if var:
key = '-'.join(re.split('[ /]', var.lower()))
key += '-text'
event_creator_vars.append( (key, var) )
## load adj grid order
adj_grid_order = []
if os.path.isfile(app.config['WD'] + '/adj-grid-order.yaml'):
adj_grid_order = yaml.load(open(app.config['WD'] + '/adj-grid-order.yaml', 'r'),
Loader = yaml.Loader)
## load preset variables
preset_vars = yaml.load(open(app.config['WD'] + '/presets.yaml', 'r'))
v1 = [(x, str.title(x).replace('-', ' ')) for x in sorted(preset_vars.keys())]
## multiple variable keys
multi_vars_keys = v1[:]
multi_vars_keys.extend(event_creator_vars[:])
multi_vars_keys = [x[0] for x in multi_vars_keys]
## pass one variables
vars = v1[:]
vars.extend(v2[:])
vars.extend(v3[:])
## single value variables for first-pass coding
sv = ['comments', 'protest', 'multi', 'nous', 'ignore']
## yaml for yes/no variables
yes_no_vars = yaml.load(open(app.config['WD'] + '/yes-no.yaml', 'r'))
## yaml for states/provinces/territories
if app.config['USE_STATES_AND_TERR']:
state_and_territory_vals = ordered_load(open(app.config['WD'] + '/states.yaml', 'r'))
#state_and_territory_vals = OrderedDict([('b', 2), ('a', 1), ('c', 3)])
else:
state_and_territory_vals = dict()
## mark the single-valued items
event_creator_single_value = app.config['SINGLE_VALUE_VARS']
event_creator_single_value.extend([[x[0] for x in v] for k, v in yes_no_vars.iteritems()])
## metadata for Solr
meta_solr = ['PUBLICATION', 'SECTION', 'BYLINE', 'DATELINE', 'DATE', 'INTERNAL_ID']
#####
##### Helper functions
#####
##### load text from Solr database
def loadSolr(solr_id):
solr_id = urllib.quote(solr_id)
url = '%s/select?q=id:"%s"&wt=json' % (app.config['SOLR_ADDR'], solr_id)
not_found = (0, [], [])
no_connect = (-1, [], [])
try:
if (sys.version_info < (3, 0)):
## Python 2
req = urllib2.Request(url)
res = urllib2.urlopen(req)
else:
## Python 3
res = urllib.request.urlopen(url)
except:
return no_connect
res = json.loads(res.read())
if res['responseHeader']['status'] != 0:
return not_found
if len(res['response']['docs']) != 1:
return not_found
doc = res['response']['docs'][0]
## sometimes no text is available with AGW
if 'TEXT' not in doc:
return (-2, [], [])
paras = doc['TEXT'].split('<br/>')
meta = []
for k in meta_solr:
if k in doc:
if k == 'DATE':
meta.append(doc[k][0].split('T')[0])
else:
meta.append(doc[k])
if 'TITLE' in doc:
title = doc['TITLE']
else:
title = paras[0]
del paras[0]
return title, meta, paras
## prep any article for display
def prepText(article):
fn = article.filename
db_id = article.db_id
atitle = article.title
pub_date = article.pub_date
publication = article.publication
fulltext = article.text
metawords = ['DATE', 'PUBLICATION', 'LANGUAGE', 'DATELINE', 'SECTION',
'EDITION', 'LENGTH', 'DATE', 'SEARCH_ID', 'Published', 'By', 'AP', 'UPI']
text = ''
html = ''
title = ''
meta = []
paras = []
path = app.config['DOC_ROOT'] + fn
filename = str('INTERNAL_ID: %s' % fn.encode('utf8'))
if app.config['STORE_ARTICLES_INTERNALLY'] == True:
title = atitle
meta = [publication, pub_date, db_id]
paras = fulltext.split('<br/>')
elif app.config['SOLR'] == True:
title, meta, paras = loadSolr(db_id)
if title == 0:
title = "Cannot find article in Solr."
elif title == -1:
title = "Cannot connect to Solr."
elif title == -2:
title = "No text. Skip article."
elif re.match(r"^.+txt$", fn):
i = 0
title = ''
pLine = ''
f = open(path, 'r')
for line in f:
line = line.strip()
words = line.split()
## remove colon from first word in the line
if len(words) > 0:
words[0] = words[0].replace(":", '')
if False:
pass
elif line == '':
pass
elif i == 0:
## first line is title
if words[0] == 'TITLE':
line = " ".join(words[1:])
title = line
elif pLine != '' and words[0] in metawords:
meta.append(line)
else:
## add to html
paras.append(line)
i += 1
pLine = line
## append filename info
meta.append(filename)
elif re.match(r"^.+xml$", fn):
## this format only works for LDC XML files
tree = etree.parse(open(path, "r"))
headline = tree.xpath("/nitf/body[1]/body.head/hedline/hl1")
paras = tree.xpath("/nitf/body/body.content/block[@class='full_text']/p")
lead = tree.xpath("/nitf/body/body.content/block[@class='lead_paragraph']/p")
byline = tree.xpath("/nitf/body/body.head/byline[@class='print_byline']")
dateline = tree.xpath("/nitf/body/body.head/dateline")
if len(byline):
meta.append(byline[0].text)
if len(dateline):
meta.append(dateline[0].text)
meta.append(filename)
title = headline[0].text
paras = [x.text for x in paras]
## get rid of lead if it says the same thing
if len(paras) > 0:
p0 = paras[0]
p0 = p0.replace("LEAD: ", "")
if len(paras) > 1:
if p0 == paras[1]:
del paras[0]
## remove HTML from every paragraph
paras = [re.sub(r'<[^>]*>', '', x) for x in paras]
## paste together paragraphs, give them an ID
all_paras = ""
for i, text in enumerate(paras):
all_paras += "<p id='%d'>%s</p>\n" % (i, text)
all_paras = all_paras.strip()
html = "<h4>%s</h4>\n" % title
html += "<p class='meta' id='meta'>%s</p>\n" % " | ".join(map(lambda x: "%s" % x, meta)).strip()
html += "<div class='bodytext' id='bodytext'>\n%s\n</div>" % all_paras
## plain-text
text = "\n".join(paras)
text = text.encode("utf-8")
html = html.encode("utf-8")
return text, html
def validate( x ):
""" replace newlines, returns, and tabs with blank space """
if x:
if type(x) == str.unicode:
x = string.replace(x, "\n", " ")
x = string.replace(x, "\r", " ")
x = string.replace(x, "\t", " ")
return x.encode('utf-8')
else:
return str(x)
else:
return "0"
def convertIDToPublication(db_id, db_name):
""" Takes a Solr ID and spits out the publication"""
if 'AGW' in db_id:
## AGW-XXX-ENG_YYYYMMDD.NNNN
r = db_id.split("_")[1]
elif 'NYT' in db_id:
r = 'NYT'
else:
r = db_id.split("_")[0]
## replace - with space
r = r.replace('-', ' ')
## add (87-95) to WaPo and USATODAY
if 'LN' in db_name:
r += " (87-95)"
return r
## truncating text for summary
@app.template_filter('summarizeText')
def summarizeText(s):
if len(s) > 15:
n = s[0:8] + "..." + s[-5:]
return n
return s
@app.template_filter('datetime')
def format_datetime(value):
if value:
return dt.datetime.strftime(value, "%Y-%m-%d %H:%M:%S")
return ''
@app.template_filter('nonestr')
def nonestr(s):
if s is not None:
return s
return ''
## java string hashcode
## copy-pasta from http://garage.pimentech.net/libcommonPython_src_python_libcommon_javastringhashcode/
## make it a Jinja2 filter for template ease
@app.template_filter('hashcode')
def hashcode(s):
h = 0
for c in s:
h = (31 * h + ord(c)) & 0xFFFFFFFF
a = ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
return int(math.fabs(a))
#####
##### App setup
#####
@app.teardown_appcontext
def shutdown_session(exception=None):
db_session.remove()
### auth stuff
@app.route('/login', methods=['GET', 'POST'])
def login():
if request.method == 'GET':
return render_template("login.html")
username = request.form['username']
password = request.form['password']
reg_user = User.query.filter_by(username=username, password=password).first()
if reg_user is None:
flash("Username or password is invalid. Please try again.", "error")
return redirect(url_for('login'))
login_user(reg_user)
return redirect(url_for('index'))
@app.route('/logout')
def logout():
logout_user()
return redirect(url_for('login'))
@lm.user_loader
def load_user(id):
return User.query.get(int(id))
## views
@app.route('/')
@app.route('/index')
@login_required
def index():
return render_template("index.html")
#####
##### Coding pages
#####
@app.route('/code1')
@login_required
def code1Next():
now = dt.datetime.now(tz = central).replace(tzinfo = None)
article = None
while article == None:
## get next article in this user's queue
next = db_session.query(ArticleQueue).filter_by(coder_id = current_user.id, coded_dt = None).first()
## out of articles, return null page
if next is None:
return render_template("null.html")
article = db_session.query(ArticleMetadata).filter_by(id = next.article_id).first()
## this is a weird error and shouldn't happen but here we are.
if article is None:
next.coded_dt = now
db_session.add(next)
db_session.commit()
return redirect(url_for('code1', aid = next.article_id))
@app.route('/code1/<aid>')
@login_required
def code1(aid):
article = db_session.query(ArticleMetadata).filter_by(id = aid).first()
text, html = prepText(article)
aq = db_session.query(ArticleQueue).filter_by(coder_id = current_user.id, article_id = aid).first()
return render_template("code1.html", vars = vars, aid = aid, text = html.decode('utf-8'))
@app.route('/code2')
@login_required
def code2Next():
if current_user.authlevel < 2:
return redirect(url_for('index'))
nextArticle = db_session.query(SecondPassQueue).filter_by(coder_id = current_user.id, coded_dt = None).first()
if nextArticle:
return redirect(url_for('code2', aid = nextArticle.article_id))
else:
return render_template("null.html")
@app.route('/code2/<aid>')
@login_required
def code2(aid):
if current_user.authlevel < 2:
return redirect(url_for('index'))
aid = int(aid)
cfp_order = ['protest', 'multi', 'nous']
cfp_dict = {cfp_name: {} for cfp_name in cfp_order}
cfp_ex = ['load', 'ignore']
sv_order = ['yes', 'no', 'maybe', 'ignore']
comments = []
opts = {}
curr = {}
## initialize the dictionary
for v in vars:
cfp_dict[v[0]] = 0
## gather coders which have coded this article
## and get single-valued items
cfps = db_session.query(CodeFirstPass).filter(CodeFirstPass.article_id == aid).all()
coders_protest = [(x[1].username, x[0].value) for x in db_session.query(CodeFirstPass, User).join(User).\
filter(CodeFirstPass.article_id == aid, CodeFirstPass.variable == 'protest').all()]
yes_coders = db_session.query(CodeFirstPass).\
filter(CodeFirstPass.article_id == aid, CodeFirstPass.variable == 'protest', CodeFirstPass.value.in_(['yes', 'maybe'])).count()
## load the single-value variables
for cfp in cfps:
if cfp.variable in cfp_ex:
continue
elif cfp.variable == 'comments':
comments.append(cfp.value)
elif cfp.variable == 'ignore':
## assign ignore to protest
if 'ignore' not in cfp_dict['protest']:
cfp_dict['protest']['ignore'] = 0
cfp_dict['protest']['ignore'] += 1
elif cfp.variable in cfp_order:
## if in the dichotomous variables, sum values
if cfp.value not in cfp_dict[cfp.variable]:
cfp_dict[cfp.variable][cfp.value] = 0
cfp_dict[cfp.variable][cfp.value] += 1
else:
## else, just mark existence
cfp_dict[cfp.variable] += 1
article = db_session.query(ArticleMetadata).filter_by(id = aid).first()
text, html = prepText(article)
return render_template(
"code2.html",
vars = vars,
aid = aid,
cfp_dict = cfp_dict,
cfp_order = cfp_order,
sv_order = sv_order,
comments = comments,
opts = opts,
curr = curr,
coders_p = coders_protest,
num_coders = len(coders_protest),
yes_coders = float(yes_coders),
text = html.decode('utf-8'))
@app.route('/event_creator')
@login_required
def ecNext():
nextArticle = db_session.query(EventCreatorQueue).filter_by(coder_id = current_user.id, coded_dt = None).first()
if nextArticle:
return redirect(url_for('eventCreator', aid = nextArticle.article_id))
else:
return render_template("null.html")
@app.route('/event_creator/<aid>')
@login_required
def eventCreator(aid):
aid = int(aid)
article = db_session.query(ArticleMetadata).filter_by(id = aid).first()
text, html = prepText(article)
return render_template("event-creator.html", aid = aid, text = html.decode('utf-8'))
#####
##### Adjudication
#####
@app.route('/adj', methods = ['GET'])
@login_required
def adj():
"""Initial rendering for adjudication page."""
if current_user.authlevel < 2:
return redirect(url_for('index'))
## Get most recent candidate events.
recent_events = [x[0] for x in db_session.query(EventMetadata, RecentEvent).\
join(EventMetadata, EventMetadata.event_id == RecentEvent.event_id).\
order_by(desc(RecentEvent.last_accessed)).limit(5).all()]
## Get most recent canonical events.
## TODO: Add in user by name.
recent_canonical_events = db_session.query(CanonicalEvent).\
join(RecentCanonicalEvent, CanonicalEvent.id == RecentCanonicalEvent.canonical_id).\
order_by(desc(RecentCanonicalEvent.last_accessed)).limit(5).all()
## TODO: Base this off EventMetadata for now. Eventually, we want to get rid of this.
filter_fields = EventMetadata.__table__.columns.keys()
filter_fields.remove('id')
filter_fields.append('flag')
return render_template("adj.html",
search_events = [],
filter_fields = filter_fields,
cand_events = {},
grid_vars = adj_grid_order,
links = [],
flags = [],
recent_events = recent_events,
recent_canonical_events = recent_canonical_events,
canonical_event = None)
@app.route('/load_adj_grid', methods = ['GET'])
@login_required
def load_adj_grid():
"""Loads the grid for the expanded event view."""
ce_ids = request.args.get('cand_events')
if ce_ids == 'null':
ce_ids = None
canonical_event_key = request.args.get('canonical_event_key')
if canonical_event_key == 'null':
canonical_event_key = None
cand_event_ids = [int(x) for x in ce_ids.split(',')] if ce_ids else []
cand_events = _load_candidate_events(cand_event_ids)
canonical_event = _load_canonical_event(key = canonical_event_key)
links = _load_links(canonical_event_key)
event_flags = _load_event_flags(cand_event_ids)
return render_template('adj-grid.html',
canonical_event = canonical_event,
cand_events = cand_events,
links = links,
flags = event_flags,
grid_vars = adj_grid_order)
#####
## Search functions
#####
@app.route('/do_search', methods = ['POST'])
@login_required
def do_search():
"""Takes the URL params and searches the candidate events for events
which meet the search criteria."""
search_str = request.form['adj_search_input']
## get multiple filters and sorting
filters = []
sorts = []
## cycle through all the filter and sort fields
for i in range(4):
filter_field = request.form['adj_filter_field_{}'.format(i)]
filter_value = request.form['adj_filter_value_{}'.format(i)]
filter_compare = request.form['adj_filter_compare_{}'.format(i)]
if filter_field and filter_value and filter_compare:
_model = EventMetadata if filter_field != 'flag' else EventFlag
## Translate the filter compare to a SQLAlchemy expression.
if filter_compare == 'eq':
_filter = getattr(getattr(_model, filter_field), '__eq__')(filter_value)
elif filter_compare == 'ne':
_filter = getattr(getattr(_model, filter_field), '__ne__')(filter_value)
elif filter_compare == 'lt':
_filter = getattr(getattr(_model, filter_field), '__lt__')(filter_value)
elif filter_compare == 'le':
_filter = getattr(getattr(_model, filter_field), '__le__')(filter_value)
elif filter_compare == 'gt':
_filter = getattr(getattr(_model, filter_field), '__gt__')(filter_value)
elif filter_compare == 'ge':
_filter = getattr(getattr(_model, filter_field), '__ge__')(filter_value)
elif filter_compare == 'contains':
_filter = getattr(getattr(_model, filter_field), 'like')(u'%{}%'.format(filter_value))
elif filter_compare == 'startswith':
_filter = getattr(getattr(_model, filter_field), 'like')(u'{}%'.format(filter_value))
elif filter_compare == 'endswith':
_filter = getattr(getattr(_model, filter_field), 'like')(u'%{}'.format(filter_value))
else:
raise Exception('Invalid filter compare: {}'.format(filter_compare))
filters.append(_filter)
sort_field = request.form['adj_sort_field_{}'.format(i)]
sort_order = request.form['adj_sort_order_{}'.format(i)]
## Sort by the specified field.
if sort_field and sort_order:
_model = EventMetadata if sort_field != 'flag' else EventFlag
_sort = getattr(getattr(_model, sort_field), sort_order)()
sorts.append(_sort)
## AND all the filters together
sort_expr = and_(*sorts)
filter_expr = and_(*filters)
search_expr = None
if search_str:
## Get all fields that are searchable.
search_fields = EventMetadata.__table__.columns.keys()
search_fields.remove('id')
## Build the search expression. For now, it can only do an AND or OR search.
operator = and_
if ' AND ' in search_str:
search_terms = search_str.split(' AND ')
operator = and_
elif ' OR ' in search_str:
search_terms = search_str.split(' OR ')
operator = or_
else:
search_terms = [search_str]
## Build the search by creating an expression for each search term and search field.
search_expr = []
for term in search_terms:
term_expr = []
for field in search_fields:
term_expr.append(getattr(getattr(EventMetadata, field), 'like')(u'%{}%'.format(term)))
search_expr.append(or_(*term_expr))
search_expr = operator(*search_expr)
## Filter out null start dates to account for disqualifying information.
date_filter = EventMetadata.start_date != None
## Combine filters.
a_filter_expr = None
if filter_expr is not None and search_expr is not None:
a_filter_expr = and_(filter_expr, search_expr, date_filter)
elif filter_expr is not None:
a_filter_expr = and_(filter_expr, date_filter)
elif search_expr is not None:
a_filter_expr = and_(search_expr, date_filter)
else:
return make_response("Please enter a search term or a filter.", 400)
## Perform the search on a left join to get all the candidate events.
search_events = db_session.query(EventMetadata).\
join(EventFlag, EventMetadata.event_id == EventFlag.event_id, isouter = True).\
filter(a_filter_expr).\
order_by(sort_expr).all()
## print(a_filter_expr)
if len(search_events) > 1000:
return make_response("Too many results. Please refine your search.", 400)
## get all flags for these events
flags = _load_event_flags([x.event_id for x in search_events])
response = make_response(
render_template('adj-search-block.html',
events = search_events,
flags = flags)
)
url_params = {k: v for k, v in request.form.iteritems()}
## make and return results. add in the number of results to update the button.
response.headers['Search-Results'] = len(search_events)
response.headers['Query'] = json.dumps(url_params)
return response
@app.route('/search_canonical', methods = ['POST'])
@login_required
def search_canonical():
"""Loads a set of canonical events which meet search criteria."""
canonical_search_term = request.form['canonical_search_term']
if canonical_search_term == '':
return make_response("Please enter a search term.", 400)
## Construct search in all available fields
filter_expr = or_(
CanonicalEvent.key.like(u'%{}%'.format(canonical_search_term)),
CanonicalEvent.description.like(u'%{}%'.format(canonical_search_term)),
CanonicalEvent.notes.like(u'%{}%'.format(canonical_search_term))
)
## search for the canonical event in key and notes
rs = db_session.query(CanonicalEvent).filter(filter_expr).all()
return render_template('adj-canonical-search-block.html', events = rs)
@app.route('/adj_search/<function>', methods = ['POST'])
@login_required
def adj_search(function):
"""Adds a search/filter/sort form row to the search pane.
Will only do this if the prior search/filter/sort form rows are full."""
if function == 'search':
search_str = request.form['adj-search-input']
if search_str is None or search_str == '':
return make_response('No search string provided', 400)
elif function == 'filter':
filter_field = request.form['adj-filter-field']
filter_compare = request.form['adj-filter-compare']
filter_value = request.form['adj-filter-value']
if filter_field is None or filter_compare is None or filter_value is None:
return make_response('No filter provided', 400)
elif function == 'sort':
sort_field = request.form['adj-sort-field']
sort_order = request.form['adj-sort-order']
if sort_field is None or sort_order is None:
return make_response('No sort field provided', 400)
else:
return make_response("Invalid search function", 400)
is_addition = True if request.form['is_addition'] == 'true' else False
return render_template('adj-{}.html'.format(function), is_addition = is_addition)
#####
## Grid functions
#####
@app.route('/add_canonical_link', methods = ['POST'])
@login_required
def add_canonical_link():
"""Adds a link from a article to a canonical event
when we don't want to add any data. """
canonical_event_id = int(request.form['canonical_event_id'])
article_id = int(request.form['article_id'])
## check if this link exists already
res = db_session.query(CodeEventCreator, CanonicalEventLink)\
.join(CanonicalEventLink, CodeEventCreator.id == CanonicalEventLink.cec_id)\
.filter(
CodeEventCreator.variable == 'link',
CodeEventCreator.article_id == article_id,
CanonicalEventLink.canonical_id == canonical_event_id
).first()
## if the CEC and CEL are not null,
## and CEL matches canonical event, then link this.
if res and res[0] and res[1]:
return make_response("Link already exists.", 400)
## for the link, create a new CEC and link it back to the canonical event
## we'll treat this as part of the dummy event
cec = _check_or_add_dummy_value(article_id, 'link', 'yes')
db_session.refresh(cec)
## add the link
db_session.add(CanonicalEventLink(current_user.id, canonical_event_id, cec.id))
db_session.commit()
return make_response("Link added.", 200)
@app.route('/add_canonical_record', methods = ['POST'])
@login_required
def add_canonical_record():
""" Adds a candidate event datum to a canonical event. """
canonical_event_id = int(request.form['canonical_event_id'])
cec_id = int(request.form['cec_id'])
## grab CEC from the database
record = db_session.query(CodeEventCreator)\
.filter(CodeEventCreator.id == cec_id).first()
## if it's fake, toss it
if not record:
return make_response("No such CEC record.", 404)
## if it exists, toss it
dupe_check = db_session.query(CanonicalEventLink)\
.filter(
CanonicalEventLink.cec_id == cec_id,
CanonicalEventLink.canonical_id == canonical_event_id)\
.all()
if dupe_check:
return make_response("Record already exists.", 404)
## commit
db_session.add(CanonicalEventLink(current_user.id, canonical_event_id, cec_id))
db_session.commit()
## retrieve cel for timestamp
cel = db_session.query(CanonicalEventLink)\
.filter(
CanonicalEventLink.coder_id == current_user.id,
CanonicalEventLink.canonical_id == canonical_event_id,
CanonicalEventLink.cec_id == cec_id
).first()
value = record.value
if record.text is not None:
value = record.text
return render_template('canonical-cell.html',
var = record.variable,
value = value,
timestamp = cel.timestamp,
cel_id = cel.id)
@app.route('/add_event_flag', methods = ['POST'])
@login_required
def add_event_flag():
"""Adds a flag to a candidate event."""
event_id = int(request.form['event_id'])
flag = request.form['flag']
db_session.add(EventFlag(current_user.id, event_id, flag))
db_session.commit()
return make_response("Flag created.", 200)
@app.route('/del_canonical_event', methods = ['POST'])
@login_required
def del_canonical_event():
""" Deletes the canonical event and related CEC links from the database."""
id = int(request.form['id'])
cels = db_session.query(CanonicalEventLink)\
.filter(CanonicalEventLink.canonical_id == id).all()
rces = db_session.query(RecentCanonicalEvent)\
.filter(RecentCanonicalEvent.canonical_id == id).all()
ce = db_session.query(CanonicalEvent)\
.filter(CanonicalEvent.id == id).first()
## remove these first to avoid FK error
for cel in cels:
db_session.delete(cel)
for rce in rces:
db_session.delete(rce)
db_session.commit()
## delete the actual event
db_session.delete(ce)
db_session.commit()
return make_response("Canonical event deleted.", 200)
@app.route('/del_canonical_link', methods = ['POST'])
@login_required
def del_canonical_link():
"""Removes 'link' from a canonical event.
Remove it from the dummy event as well."""
article_id = int(request.form['article_id'])
## get all the CECs for this article
cecs = db_session.query(CodeEventCreator)\
.filter(
CodeEventCreator.article_id == article_id,
CodeEventCreator.variable == 'link'
).all()
for cec in cecs:
## get the CELs for this CEC
cel = db_session.query(CanonicalEventLink).filter(CanonicalEventLink.cec_id == cec.id).first()
if cel:
db_session.delete(cel)
## commit these deletes first to avoid foreign key error
db_session.commit()
## then delete CECs
for cec in cecs:
db_session.delete(cec)
db_session.commit()
return make_response("Link removed.", 200)
@app.route('/del_canonical_record', methods = ['POST'])
@login_required
def del_canonical_record():
""" Removes the link between a candidate event piece of data and a canonical event. """
cel_id = int(request.form['cel_id'])
## grab it from the database
cel = db_session.query(CanonicalEventLink)\
.filter(CanonicalEventLink.id == cel_id).first()
## if it's fake, toss it
if not cel:
return make_response("No such CEL record.", 404)