-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalyzerBackend.py
158 lines (141 loc) · 6.2 KB
/
AnalyzerBackend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import sys
import praw
import json
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, KeywordsOptions, ConceptsOptions, EntitiesOptions
def get_text_samples(subname, n_posts, n_comments_per_post):
"""Extract text samples from reddit."""
reddit = praw.Reddit(client_id='3VF3aqQ8Lr1Ceg', client_secret='sfpMy2z4YYGoiuWHAkPrOlZgj04', password='bRAggER95$',user_agent='Post_Extraction',username='Baker_Street_Analyt')
subreddit = reddit.subreddit(subname)
samples = []
print('Format: Title | Score')
print('________________________________________________')
for submission in subreddit.hot(limit=n_posts):
if submission.title:
print(submission.title,'|',submission.score)
samples.append(submission.title)
if submission.selftext:
# (selftext can be empty)
samples.append(submission.selftext)
submission.comments.replace_more(limit=0) # get rid of "More comments..." placeholders
submission.comment_sort = 'top'
i = 1 # counting how many comments we've processed
for comment in submission.comments:
if comment.body:
samples.append(comment.body)
i += 1
if i > n_comments_per_post:
break
print('________________________________________________')
return samples
#return ['\n\n'.join(samples)]
'''progress updates to make it more seeable'''
def get_nlu_data(samples):
"""Query IBM NLU to get keyword data for each sample."""
data = {}
nlu = NaturalLanguageUnderstandingV1(
version='2018-03-16',
username='764f1427-efb8-41b7-96b5-ab585021e5da',
password='GwnlOQ77YmGy')
for s in samples:
response = nlu.analyze(
text=s,
language='en',
features=Features(
keywords=KeywordsOptions(
emotion=True,
limit=5),
entities=EntitiesOptions(
emotion=True,
limit=5)
))
data[s] = {'key' : {}, 'ent' : {}}
for kwd_data in response.result['keywords']:
if ('relevance' not in kwd_data or 'emotion' not in kwd_data):
continue # skip this one, it doesn't have full data?
data[s]['key'][kwd_data['text']] = kwd_data
for ent_data in response.result['entities']:
if ('relevance' not in ent_data or 'emotion' not in ent_data):
continue #yuh yeet
data[s]['ent'][ent_data['text']] = ent_data
return data
def get_common_words(nlu_data, n, ty):
"""Get the top n keywords."""
relevance = {}
for sample in nlu_data:
for keyword in nlu_data[sample][ty]:
if keyword not in relevance:
relevance[keyword] = 0
relevance[keyword] += nlu_data[sample][ty][keyword]['relevance']
return sorted(list(relevance.keys()), key=lambda k: relevance[k], reverse=True)[:n]
def get_emotion_information(nlu_data, keywords, ty):
emotion = {} # keyword -> [happy factor, unhappy factor]
for kw in keywords:
emotion[kw] = [0.0, 0.0]
count = 0 # number of samples incorporated
for sample in nlu_data:
if kw not in nlu_data[sample][ty]:
continue
count += 1
emotion[kw][0] += nlu_data[sample][ty][kw]['emotion']['joy']*4.0 # there are 4 sad factors and only one happy one
emotion[kw][1] += sum([nlu_data[sample][ty][kw]['emotion'][a] for a in ['sadness', 'fear', 'disgust', 'anger']])
# perform averaging
emotion[kw][0] = emotion[kw][0]/count
emotion[kw][1] = emotion[kw][1]/count
return emotion
def graph_app(common, emotions):
app = dash.Dash()
happy, sad, mood = [], [], []
for x in common:
mood.append([-emotions[x][1], emotions[x][0]])
happy.append(emotions[x][0])
sad.append(-emotions[x][1])
x = [y for y in range(len(happy))]
app.layout = html.Div(children=[
html.H1(children='DeepAnalyzer'),
html.Div(children='''
A Deep Analyzer of subreddit trends
'''),
dcc.Graph(
id='example-graph',
figure={
'data': [
#{'x': x, 'y': mood, 'type': 'bar', 'name': 'Mood'},
{'x': common, 'y': happy, 'type': 'bar', 'name': 'Happy'},
{'x': common, 'y': sad, 'type': 'bar', 'name': 'Sad'},
],
'layout': {
'title': 'Keyword Analysis',
'barmode':'relative',
}
}
)
])
app.run_server(debug=True)
def main():
SUBREDDIT = sys.argv[1]
N_POSTS = int(sys.argv[2])
N_COMMENTS_PER_POST = int(sys.argv[3])
N_KEYWORDS_WANTED = int(sys.argv[4])
samples = get_text_samples(SUBREDDIT, N_POSTS, N_COMMENTS_PER_POST)
data = get_nlu_data(samples)
"""
sys.stdout.write('keyword\thappy_score\tunhappy_score\n')
for keyword in common:
sys.stdout.write('{}\t{}\t{}\n'.format(keyword, emotions[keyword][0], emotions[keyword][1]))
"""
graph_app(common, emotions)
if __name__ == '__main__':
main()
#some sort of print function that displays wtf is happening LUL
#go back to the json created by the first subfunction; then identify strings with the keywords generated, then produce a sentiment reading for the selected posts
#output in conjunction with upvote values
#after this take the data that's been printed then use libmatplot in order to produce some d o p e graphics for the advertising reps
'''Traceback (most recent call last):
File "c:/Users/Siddu/Documents/CompBioProjects/hackMIT/penultimate_reddit_analyzer.py", line 87, in <module>
main()
File "c:/Users/Siddu/Documents/CompBioProjects/hackMIT/penultimate_reddit_analyzer.py", line 79, in main
keyword_data = get_nlu_keyword_data(samples)
File "c:/Users/Siddu/Documents/CompBioProjects/hackMIT/penultimate_reddit_analyzer.py", line 43, in get_nlu_keyword_data
for kwd_data in response['keywords']:
TypeError: 'DetailedResponse' object is not subscriptable'''