-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrends.py
38 lines (29 loc) · 1.26 KB
/
trends.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import simplejson
import twitter
import pickle, re, os, urllib
# code from http://zenmachine.wordpress.com/2009/08/19/data-mining-harvesting-from-twitter/
detect_url_pattern = re.compile('(http://.+?/.+?(\s|$))+', re.I)
filename = "last_topic_ids.db"
if os.path.exists(filename):
last_topic_ids = pickle.load(file(filename, 'r+b'))
else:
last_topic_ids = {}
api = twitter.Api()
trends_current = simplejson.loads(api._FetchUrl("http://search.twitter.com/trends/current.json"))
c = trends_current["trends"]
for a in c[c.keys()[0]]:
if a['query'] not in last_topic_ids.keys():
url = "http://search.twitter.com/search.json?q=%s" % (urllib.quote_plus(a['query']))
else:
url = "http://search.twitter.com/search.json?q=%s&since_id=%s" % (urllib.quote_plus(a['query']), last_topic_ids[a['query']])
print "--------------------------------------"
print "%s: %s" % (a['name'], url)
statuses = simplejson.loads(api._FetchUrl(url))
for s in statuses['results']:
urls = detect_url_pattern.findall(s['text'])
if len(urls) > 0:
print urls[0]
last_topic_ids[a['query']] = statuses['max_id']
print "--------------------------------------"
print last_topic_ids
pickle.dump(last_topic_ids, file(filename, 'w+b'))