Skip to content

Commit e6765bd

Browse files
committed
Working but ugly labels in the visualization
1 parent 619f129 commit e6765bd

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed

hierarchy_analysis.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
import praw
2+
import scipy as sp
3+
import numpy as np
4+
import sys
5+
import operator
6+
import time
7+
import project as p
8+
import matplotlib.pyplot as plt
9+
import scipy.cluster.hierarchy as hi
10+
11+
class histogram:
12+
def __init__(self, dictionary=None):
13+
self.frequencies = {}
14+
if dictionary is not None:
15+
self.frequencies = dictionary
16+
17+
def add_frequency(self, key, value):
18+
if key in self.frequencies:
19+
self.frequencies[key] += value
20+
else:
21+
self.frequencies[key] = value
22+
23+
def add_by_frequencies(self,frequencies):
24+
for key in frequencies.frequencies:
25+
self.add_frequency(key, frequencies.frequencies[key])
26+
27+
def multiply_frequency(self, key, value):
28+
if key in self.frequencies:
29+
self.frequencies[key] *= value
30+
else:
31+
self.frequencies[key] = 0.0
32+
33+
def multiply_by_frequencies(self, frequencies):
34+
for key in frequencies.frequencies:
35+
self.multiply_frequency(key, frequencies.frequencies[key])
36+
37+
def multiply_by_scalar(self, scalar):
38+
for key in self.frequencies:
39+
self.multiply_frequency(key,scalar)
40+
41+
def divide_frequency(self, key, value):
42+
if key in self.frequencies:
43+
if value != 0:
44+
if self.frequencies[key] == 0:
45+
self.frequencies[key] = 1.0
46+
else:
47+
self.frequencies[key] /= (0.0 + value)
48+
else:
49+
if self.frequencies[key] == 0:
50+
self.frequencies[key] = 1.0
51+
else:
52+
self.frequencies[key] = float('inf')
53+
else:
54+
if value > 0:
55+
self.frequencies[key] = 0.0
56+
else:
57+
self.frequencies[key] = 1.0
58+
59+
def divide_by_frequencies(self, frequencies):
60+
for key in frequencies.frequencies:
61+
self.divide_frequency(key, frequencies.frequencies[key])
62+
63+
64+
class comment:
65+
def __init__(self, comment):
66+
if comment is not None and hasattr(comment,'author') and comment.author is not None and hasattr(comment.author, 'name'):
67+
self.author_name = comment.author.name
68+
else:
69+
self.author_name = ''
70+
71+
self.subreddit = str(comment.subreddit.display_name.strip(' ').lower())
72+
73+
class user:
74+
@staticmethod
75+
def get_histogram(comments, author_name):
76+
total_comments_by_author = 0
77+
the_histogram = histogram()
78+
for comment in comments:
79+
if comment.author_name == author_name:
80+
total_comments_by_author += 1
81+
the_histogram.add_frequency(comment.subreddit, 1)
82+
the_histogram.multiply_by_scalar(1.0 / total_comments_by_author)
83+
return the_histogram.frequencies
84+
85+
class community:
86+
@staticmethod
87+
def get_histogram(comments, subreddit_name):
88+
total_comments_in_subreddit = 0
89+
the_histogram = histogram()
90+
for comment in comments:
91+
if comment.subreddit == subreddit_name:
92+
total_comments_in_subreddit += 1
93+
the_histogram.add_frequency(comment.author_name, 1)
94+
the_histogram.multiply_by_scalar(1.0 / total_comments_in_subreddit)
95+
return the_histogram.frequencies
96+
97+
98+
user_agent = ("Testing Reddit Functionality by /u/Reddit_Projector https://github.com/joshlemer/RedditProject")
99+
reddit = praw.Reddit(user_agent)
100+
subredditName = 'all'
101+
subreddit_object = reddit.get_subreddit(subredditName)
102+
103+
104+
x = 5
105+
y = 5
106+
z = 100
107+
comments = [comment(a) for a in subreddit_object.get_comments(limit=x)]
108+
x_comments = [comment(a) for a in subreddit_object.get_comments(limit=x)]
109+
x_subs = []
110+
i = 0
111+
for c in x_comments:
112+
print "x = ", i
113+
if c.subreddit not in x_subs:
114+
x_subs.append(c.subreddit)
115+
i += 1
116+
117+
y_comments = []
118+
i = 0
119+
for x_sub in x_subs:
120+
print "y = ", i
121+
subreddit_object = reddit.get_subreddit(x_sub)
122+
y_comments += [comment(a) for a in subreddit_object.get_comments(limit=y)]
123+
i += 1
124+
125+
z_comments = []
126+
i = 0
127+
for y_com in y_comments:
128+
print "z = ", i
129+
z_comments += [comment(a) for a in reddit.get_redditor(y_com.author_name).get_comments(limit=z)]
130+
i += 1
131+
132+
comments = list(z_comments)
133+
print "COMMENTS LENGTH: ", len(comments)
134+
135+
users = {}
136+
for comment in comments:
137+
if comment.author_name not in users:
138+
users[comment.author_name] = user.get_histogram(comments, comment.author_name)
139+
140+
#for c in comments:
141+
# print "%s\t%s" % (c.author_name, c.subreddit)
142+
143+
#print users
144+
145+
146+
subreddits = {}
147+
for comment in comments:
148+
if comment.subreddit not in subreddits:
149+
subreddits[comment.subreddit] = community.get_histogram(comments, comment.subreddit)
150+
151+
#print subreddits
152+
153+
sub_relatedness = {}
154+
for sub in subreddits:
155+
sub_histogram = histogram()
156+
for user in subreddits[sub]:
157+
user_histogram = histogram(users[user])
158+
user_histogram.multiply_by_scalar(subreddits[sub][user])
159+
160+
sub_histogram.add_by_frequencies(user_histogram)
161+
sub_relatedness[sub] = sub_histogram.frequencies
162+
163+
print sub_relatedness
164+
165+
for u in sub_relatedness:
166+
if len(sub_relatedness[u]) != 1:
167+
print u, sub_relatedness[u]
168+
169+
subreddit_names = [x for x in subreddits]
170+
print subreddit_names
171+
subreddit_rows = []
172+
for sub in subreddit_names:
173+
sub_row = []
174+
for sub_name in subreddit_names:
175+
if sub_name in sub_relatedness[sub]:
176+
sub_row.append(sub_relatedness[sub][sub_name])
177+
else:
178+
sub_row.append(float(0))
179+
subreddit_rows.append(sub_row)
180+
print subreddit_rows
181+
182+
b = sp.spatial.distance.pdist(subreddit_rows, 'euclidean')
183+
c = hi.linkage(b,method='single', metric='euclidean')
184+
hi.dendrogram(c, labels=subreddit_names)
185+
plt.show()
186+
187+
188+
189+
190+
191+
192+
193+
194+

0 commit comments

Comments
 (0)