-
Notifications
You must be signed in to change notification settings - Fork 0
/
querysets.py
288 lines (267 loc) · 6.6 KB
/
querysets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import pandas as pd
from pytrends.request import TrendReq
NUM_KEYWORDS = 3
KEYWORD_CSV = 'top_news_queries_20171029.csv'
from constants import POPULAR_CATEGORIES
def from_csv():
"""Loads keywords from CSV"""
datf = pd.read_csv(KEYWORD_CSV)
keywords = list(datf[datf.columns[0]])
keyword_objs = [
{
'keyword': keyword,
'category': 'manual_news',
} for keyword in keywords
]
return keyword_objs
"""
select DISTINCT query from serp where category != 'procon_popular' and category != 'trending';
"Pizza Hut"
"Domino's Pizza"
"McDonald's"
"Amazon"
"Walmart"
"Target Corporation"
"Chevrolet"
"Honda"
"Toyota"
"Wells Fargo"
"Chase Bank"
"Bank of America"
"Donald Trump"
"Hillary Clinton"
"Barack Obama"
"United States Department of Veterans Affairs"
"US Senate"
"US Supreme Court"
"""
def from_trends_top_query_by_category(n=NUM_KEYWORDS):
"""
Get a set of keyword objects by querying Google Trends
Each keyword obj is a dict with keys: keyword, category
"""
keyword_objs = []
for cid in POPULAR_CATEGORIES:
yearmonth = '2016'
pytrends = TrendReq(hl='en-US', tz=360)
keywords = pytrends.top_charts(yearmonth, cid=cid, geo='US')
keywords = keywords.title.tolist()[:n]
keyword_objs += [
{'keyword': x, 'category': cid} for x in keywords
]
return keyword_objs
PROCON_POPULAR = [
'medical marijuana',
'gun control',
'animal testing',
'death penalty',
'school uniforms',
'drinking age',
'minimum wage',
'euthanasia',
'illegal immigration',
'abortion',
]
# the top trending search from Nov. 28 to Dec. 7 (in paper)
TRENDING = [
'bitcoin price',
'al franken',
'california fires',
'ryan shazier',
'eagles',
'college football playoff',
'michael flynn',
'gomer pyle',
'matt lauer',
'gertrude jekyll',
]
PROCON_A_TO_Z = [
'Abortion',
'ACLU - Good or Bad?',
'Alternative Energy vs. Fossil Fuels',
'Animal Testing',
'Big Three Auto Bailout',
'Born Gay? Origins of Sexual Orientation',
'Cell Phones - Are They Safe?',
'Churches and Taxes',
'Climate Change',
'College Education Worth It?',
'College Football Playoffs',
'Concealed Handguns',
'Corporate Tax Rate & Jobs',
'Cuba Embargo',
'D.A.R.E. - Good or Bad?',
'Death Penalty',
'Drinking Age - Lower It?',
'Drone Strikes Overseas',
'Euthanasia & Assisted Suicide',
'Felon Voting',
'Gay Marriage',
'Gold Standard',
'Golf - Is It a Sport?',
'Gun Control',
'Illegal Immigration',
'Insider Trading by Congress',
'Israeli-Palestinian Conflict',
'Local Elections - Santa Monica, 2014',
'Medical Marijuana',
'Milk - Is It Healthy?',
'Minimum Wage',
'Obamacare - Good or Bad?',
'Obesity a Disease?',
'Prescription Drug Ads Good?',
'President Bill Clinton',
'President Ronald Reagan',
'Presidential Election, 2008',
'Presidential Election, 2012',
'Presidential Election, 2016',
'Prostitution - Legalize?',
'Right to Health Care?',
'School Uniforms',
'Social Networking - Good or Bad?',
'Social Security Privatization',
'Sports and Drugs',
'Standardized Tests',
'Tablets vs. Textbooks',
'Teacher Tenure',
'Under God in the Pledge',
'US-Iraq War',
'Vaccines for Kids',
'Vegetarianism',
'Video Games and Violence',
'Voting Machines',
'WTC Muslim Center',
]
# the top trending searches from Dec 16 and 17
ALL_TRENDS_DEC1617 = [
'December global festivities',
'Atlanta airport',
'A Christmas Story live',
'Philadelphia Eagles',
'Cowboys vs Raiders',
'Christopher Plummer',
'Aaron Rodgers',
'Clash of champions 2017',
'Marvin Lewis',
'New Orleans Saints',
'Saints',
'White Christmas',
'Miami Dolphins',
'The sound of music',
'Chris Matthews',
'Gujarat Elections',
'NFL Playoff Predictions',
'Scarlett Johansson',
'Baltimore Ravens',
'Danny Kaye',
]
# the top 10 queries form https://trends.google.com/trends/explore?geo=US&q=insurance
# on 1/3/2018
# United States
# past year
TOP_INSURANCE = [
'health insurance',
'car insurance',
'auto insurance',
'life insurance',
'progressive',
'progressive insurance',
'home insurance',
'insurance companies',
'geico insurance',
'geico',
]
# the top 10 queries form https://trends.google.com/trends/explore?geo=US&q=loans
# on 1/3/2018
# United States
# past year
TOP_LOANS = [
'student loan',
'loan',
'payday loans',
'home loans',
'loans bad credit',
'personal loans',
'loans online',
'quicken loans',
'bank loans',
'quicken',
]
# https://trends.google.com/trends/explore?geo=US&q=symptoms
# on 1/3/2018
# United States
# past year
TOP_SYMPTOMS = [
'cancer symptoms',
'pregnancy symptoms',
'flu symptoms',
'symptoms of cancer',
'diabetes',
'diabetes symptoms',
'uti symptomps',
'anxiety symptoms',
'cold symptoms',
'period symptoms',
]
GEORGETOWN_IR_MED = [
"indigestion",
"dropsy disease",
"profuse sweating",
"suicidal thoughts",
"slow heart rate",
"graves disease",
"excessive burping",
"how to lose weight",
"hair loss in women causes",
"fear of heights",
"leg blood clot symptoms",
"can't sleep",
"hairloss",
"difficulty breathing",
"body odor",
"salivary gland stones",
"excessive sweating",
"nervousness",
"sunlight causing hives",
"foods to avoid with acid reflux",
"trouble swallowing when eating",
"how to stop a nosebleed",
"hives",
"common cold symptoms",
"stress incontinence",
"sweating sickness",
"bloated stomach",
"blood clot",
"sweet sweat",
"kidney failure symptoms",
"trouble swallowing",
"heat stroke",
"spontaneous abortion",
"foods cause gout",
"bloody stools in adults",
"back problems",
"indigestion symptoms",
"shaking hands",
"ringing ears",
"memory loss",
"nosebleed",
"brown vaginal discharge",
"memory loss in women",
"pressure ulcers",
"acid reflux symptoms",
"what causes hair loss in women",
"erectile dysfunction remedies",
"tooth ache",
"acid reflux",
"double vision causes"
]
CURATED = {
'trending': TRENDING,
'popular': from_trends_top_query_by_category(),
'procon_popular': PROCON_POPULAR,
'procon_a_to_z': PROCON_A_TO_Z,
'top_insurance': TOP_INSURANCE,
'top_loans': TOP_LOANS,
'top_symptoms': TOP_SYMPTOMS,
'med_sample_first_20': GEORGETOWN_IR_MED[:20]
}