Skip to content

Commit ca291d2

Browse files
committed
check keyword function implemented
1 parent 34a7c08 commit ca291d2

File tree

1 file changed

+70
-29
lines changed

1 file changed

+70
-29
lines changed

FBScraper.py

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from urllib.parse import urlencode
1010
import pandas as pd
1111
from urllib.request import urlopen
12+
import numpy as np
13+
import math
1214

1315
# The location of the input file
1416
filename_root = 'C:/Users/Synergos/OneDrive/Documents/SusMon_Facebook_API'
@@ -65,8 +67,8 @@ def getSustainabilityKeywords():
6567
keywordList = []
6668

6769
for i in range(1, m_row + 1):
68-
cell_obj = kw_sheet.cell(row=i, column=1)
69-
keywordList.append(cell_obj.value)
70+
cell_obj = kw_sheet.cell(row=i, column=1)
71+
keywordList.append(cell_obj.value)
7072

7173
return keywordList
7274

@@ -126,43 +128,81 @@ def connectToFacebook():
126128
data = facebook_connection.read().decode('utf8')
127129
json_object = json.loads(data)
128130
posts = json_object["data"]
129-
# df = pd.DataFrame(posts)
130-
df = posts
131-
132-
df['Angry'] = df['Angry'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
133-
df['Angry'] = df['Angry'].str.replace(',(.*?)}}','')
134-
df['Haha'] = df['Haha'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
135-
df['Haha'] = df['Haha'].str.replace('}}','')
136-
df['Love'] = df['Love'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
137-
df['Love'] = df['Love'].str.replace('}}','')
138-
df['Sad'] = df['Sad'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
139-
df['Sad'] = df['Sad'].str.replace(',(.*?)}}','')
140-
df['Wow'] = df['Wow'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
141-
df['Wow'] = df['Wow'].str.replace('}}','')
142-
df['comments'] = df['comments'].astype(str).str.replace('{\'data\':(.*?)count\': ','')
143-
df['comments'] = df['comments'].str.replace(',(.*?)}}','')
144-
df['likes'] = df['likes'].astype(str).str.replace('{\'(.*?)count\':','')
145-
df['likes'] = df['likes'].str.replace(',(.*?)}}','')
146-
df['shares'] = df['shares'].astype(str).str.replace('{\'count\': ','')
147-
df['shares'] = df['shares'].str.replace('}','')
148-
df['date'], df['time'] = df['created_time'].astype(str).str.split('T', 1).str
149-
df['time'] = df['time'].str.replace('[+]0000','')
131+
df = pd.DataFrame(posts)
132+
133+
df['Angry'] = df['Angry'].astype(str).str.replace(
134+
'{\'data\':(.*?)count\': ', '')
135+
df['Angry'] = df['Angry'].str.replace(',(.*?)}}', '')
136+
df['Haha'] = df['Haha'].astype(str).str.replace(
137+
'{\'data\':(.*?)count\': ', '')
138+
df['Haha'] = df['Haha'].str.replace('}}', '')
139+
df['Love'] = df['Love'].astype(str).str.replace(
140+
'{\'data\':(.*?)count\': ', '')
141+
df['Love'] = df['Love'].str.replace('}}', '')
142+
df['Sad'] = df['Sad'].astype(str).str.replace(
143+
'{\'data\':(.*?)count\': ', '')
144+
df['Sad'] = df['Sad'].str.replace(',(.*?)}}', '')
145+
df['Wow'] = df['Wow'].astype(str).str.replace(
146+
'{\'data\':(.*?)count\': ', '')
147+
df['Wow'] = df['Wow'].str.replace('}}', '')
148+
df['comments'] = df['comments'].astype(
149+
str).str.replace('{\'data\':(.*?)count\': ', '')
150+
df['comments'] = df['comments'].str.replace(',(.*?)}}', '')
151+
df['likes'] = df['likes'].astype(
152+
str).str.replace('{\'(.*?)count\':', '')
153+
df['likes'] = df['likes'].str.replace(',(.*?)}}', '')
154+
df['shares'] = df['shares'].astype(str).str.replace('{\'count\': ', '')
155+
df['shares'] = df['shares'].str.replace('}', '')
156+
df['date'], df['time'] = df['created_time'].astype(
157+
str).str.split('T', 1).str
158+
df['time'] = df['time'].str.replace('[+]0000', '')
150159
df.to_csv("Facebook Posts.csv")
151160
# print(df)
152161

153162
except Exception as ex:
154-
print (ex)
163+
print(ex)
155164

156165
return df
157166

167+
# Function to scan eash post message for sustainability keywords
168+
169+
158170
def checkPostForKeywords():
159171

160-
facebookPosts = connectToFacebook()
161-
print(json.dumps(facebookPosts))
162-
keywords = getSustainabilityKeywords()
172+
keywordsList = getSustainabilityKeywords()
163173
# print(keywords)
164-
165-
174+
175+
facebookPosts = connectToFacebook()
176+
# print(facebookPosts)
177+
178+
postMessages = dict()
179+
postMessages['message'] = facebookPosts['message']
180+
181+
# Get Total length of keyword list
182+
totalNumOfKeywords = len(keywordsList)
183+
# print(totalNumOfKeywords)
184+
185+
for individualMsg in postMessages.get("message"):
186+
187+
# Check if message is Nan
188+
if pd.isnull(individualMsg):
189+
# print("I am Nan")
190+
continue
191+
192+
tw_txtu = individualMsg.encode('utf-8')
193+
print(tw_txtu)
194+
195+
term_count = 0
196+
while term_count < totalNumOfKeywords:
197+
198+
# loop through our relevant terms
199+
termu = keywordsList[term_count].encode('utf-8')
200+
if termu in tw_txtu:
201+
print("I contain a relevant Keyword")
202+
break
203+
else:
204+
term_count += 1
205+
print("I do not contain the Keyword")
166206

167207

168208
def main():
@@ -176,5 +216,6 @@ def main():
176216
# connectToFacebook()
177217
checkPostForKeywords()
178218

219+
179220
if __name__ == '__main__':
180221
main()

0 commit comments

Comments
 (0)