-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyzer.py
118 lines (97 loc) · 4.7 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
import codecs
import os
from unidecode import unidecode
import connections
import pandas as pd
from oscar_timeline import OSCAR_TIMELINE
COLLECTION = '#Oscars'
def get_all_collection_data():
with connections.get_db_connection() as client:
mongo_collection = client['tweets'][COLLECTION]
tweets = mongo_collection.find({})
tweets_to_use = []
for tweet in tweets:
tweet_useful_dict = {'id': tweet['_id'],
# 'sentiment': tweet['sentiment']['classification'],
'eg': tweet['sentiment']['p_neg'],
# 'prob_pos': tweet['sentiment']['p_pos'],
'date': tweet['created_at_datetime']
}
tweets_to_use.append(tweet_useful_dict)
tweet_frames = pd.DataFrame.from_records(tweets_to_use, columns=['eg', 'date'], index='date')
# tweet_frames2 = pd.DataFrame.from_records(tweets_to_use, columns=['prob_neg', 'date'], index='date')
tweet_frames = tweet_frames.groupby('eg').resample('5T').count()
# tweet_frames2 = tweet_frames2.groupby('prob_neg').resample('5T').count()
# oscar_frame = pd.DataFrame.from_records(OSCAR_TIMELINE, columns=['event', 'date'])
tweet_file = codecs.open(os.path.abspath('teste/tweet.csv'), 'w', encoding='utf-8')
# tweet_file2 = codecs.open(os.path.abspath('teste/tweet2.csv'), 'w', encoding='utf-8')
for line in tweet_frames.to_csv(encoding='utf-8'):
tweet_file.write(unidecode(line))
# for line in tweet_frames2.to_csv(encoding='utf-8'):
# tweet_file2.write(unidecode(line))
tweet_file.close()
# tweet_file2.close()
def get_location_collection_data():
with connections.get_db_connection() as client:
mongo_collection = client['tweets'][COLLECTION]
tweets = mongo_collection.find({})
tweets_to_use = []
for tweet in tweets:
if tweet['user']['location']:
tweet_useful_dict = {'id': tweet['_id'],
'prob_pos': tweet['sentiment']['p_pos'],
'eg': tweet['sentiment']['p_neg'],
'location ': tweet['user']['location']
}
else:
continue
tweets_to_use.append(tweet_useful_dict)
tweet_frames = pd.DataFrame.from_records(tweets_to_use)
tweet_file = codecs.open(os.path.abspath('teste/tweet.csv'), 'w', encoding='utf-8')
for line in tweet_frames.to_csv(encoding='utf-8'):
tweet_file.write(unidecode(line))
def get_lang_collection_data():
with connections.get_db_connection() as client:
mongo_collection = client['tweets'][COLLECTION]
tweets = mongo_collection.find({})
tweets_to_use = []
for tweet in tweets:
tweet_useful_dict = {'id': tweet['_id'],
'sentiment': tweet['sentiment']['classification'],
'lang': tweet['lang']
}
tweets_to_use.append(tweet_useful_dict)
tweet_frames = pd.DataFrame.from_records(tweets_to_use)
tweet_file = codecs.open(os.path.abspath('teste/tweet.csv'), 'w', encoding='utf-8')
for line in tweet_frames.to_csv(encoding='utf-8'):
tweet_file.write(unidecode(line))
def get_all_collection_data_freqdist():
with connections.get_db_connection() as client:
mongo_collection = client['tweets'][COLLECTION]
tweets = mongo_collection.find({})
tweets_to_use = []
for tweet in tweets:
tweet_useful_dict = {'id': tweet['_id'],
'sentiment': tweet['sentiment']['classification'],
'date': tweet['created_at_datetime'],
'text': tweet['text']
}
tweets_to_use.append(tweet_useful_dict)
tweet_frames = pd.DataFrame.from_records(tweets_to_use, columns=['sentiment', 'date', 'text'], index='date')
tweet_frames = tweet_frames.groupby('sentiment').resample('5T').apply(junta_tudo)
# import ipdb; ipdb.set_trace()
# tweet_file = codecs.open(os.path.abspath('teste/tweet.csv'), 'w', encoding='utf-8')
#
for line in tweet_frames:
print line
# tweet_file.write(unidecode(line))
#
# tweet_file.close()
def junta_tudo(algo):
teste = []
for a in algo:
teste.append(a)
return teste
if __name__ == '__main__':
get_location_collection_data()