-
Notifications
You must be signed in to change notification settings - Fork 1
/
scrape_group_posts.py
70 lines (61 loc) · 1.89 KB
/
scrape_group_posts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
import json
from wordcloud import WordCloud
def add_counters(master, new):
dnew = dict(new)
for k in dnew:
if k not in master.keys():
master[k] = dnew[k]
else:
master[k] += dnew[k]
def update_counter(wc, s, ctr):
add_counters(ctr, wc.process_text(s))
return ctr
def do_comments(id):
accesskey = open('accesskey','r').read()
_url = "https://graph.facebook.com/v2.8/$ID/comments?access_token=_at_"
__url = "https://graph.facebook.com/v2.8/$ID/comments?after=$F&access_token=$AT"
_url=_url.replace('$ID',id)
__url = __url.replace('$ID', id)
__url = __url.replace('$AT',accesskey)
_url=_url.replace('_at_',accesskey)
s = ''
try:
#while True:
r = requests.get(_url)
data = json.loads(r.text)
for comment in data['data']:
try:
pass
#print('c:', comment['message'])
s += comment['message'] + '\n'
except KeyError:
pass
_url = __url.replace('$F',data['paging']['cursors']['after'])
r = requests.get(_url)
except KeyError:
return s
return s
accesskey = open('accesskey','r').read()
groupid = open('groupid','r').read()
_url = "https://graph.facebook.com/v2.8/__/feed?access_token=_at_"
_url=_url.replace('__',groupid)
_url=_url.replace('_at_',accesskey)
ctr = {}
wc = WordCloud(height=400, width=800)
pages = 10
while pages:
data = requests.get(_url).text
data = json.loads(data)
for post in data['data']:
try:
update_counter(wc, post['message']+'\n'+do_comments(post['id']), ctr)
except KeyError:
pass
try:
_url = data['paging']['next']
pages -= 1
print("Next page.", pages,"more pages to go.")
except KeyError:
break
wc.generate_from_frequencies(ctr.items()).to_file('out.png')