forked from Fossj117/opinion-mining
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
55 lines (37 loc) · 1.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import json
import time
import pandas as pd
from pymongo import MongoClient
from classes.business import Business
def get_reviews_for_business(bus_id, df):
"""
INPUT: business id, pandas DataFrame
OUTPUT: Series with only texts
For a given business id, return the review_id and
text of all reviews for that business.
"""
return df[df.business_id==bus_id]
def read_data():
"""
INPUT: None
OUTPUT: pandas data frame from file
"""
return pd.read_csv('./raw_data/yelp_data/processed.csv')
def main():
client = MongoClient()
db = client.yelptest2
summaries_coll = db.summaries
print "Loading data..."
df = read_data()
bus_ids = df.business_id.unique()[21:]
for bus_id in bus_ids:
print "Working on biz_id %s" % bus_id
start = time.time()
biz = Business(get_reviews_for_business(bus_id,df))
summary = biz.aspect_based_summary()
summaries_coll.insert(summary)
print "Inserted summary for %s into Mongo" % biz.business_name
elapsed = time.time() - start
print "Time elapsed: %d" % elapsed
if __name__ == "__main__":
main()