forked from codelucas/shorten.tv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbg.py
executable file
·72 lines (57 loc) · 2.1 KB
/
bg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python2.7
"""
Here is shorten.tv's main background task to re-load
and cache popular youtube videos so users have less
wait time when using the webapp.
"""
import requests
import string
import backend
import urllib
letters = list(string.lowercase) # a, b, c ... z
popular = ["Rihanna", "Usher", "Katy Perry", "Eminem", "Shakira",
"Taylor Swift", "Akon", "Lady Gaga", "Paramore", "Jay Z",
"Led Zepplin", "Guns N Roses", "Aerosmith", "Borat",
"Fallout Boy", "Blink 182", "Justin Bieber", "Drake"]
searches = letters + popular
numb_thumbs = "5"
numb_queries = 5
def encodeURIComponent(input_str):
"""
Python equivalent of javascript's encodeURIComponent
"""
return urllib.quote(unicode(input_str).encode('utf-8'), safe='~()*!.\'')
def top_query(term):
"""
Retrieves top google autocompletion api query
"""
url = "http://suggestqueries.google.com/complete/search?" + \
"hl=en&ds=yt&client=youtube&json=t&q=" + \
encodeURIComponent(term) + "&cp=1"
results = requests.get(url).json()
queries = results[1][:5]
print "Autocomplete results for", results[0], "are", queries
return queries[0] # top query
def youtube_top_five(query):
"""
Retrieves top five yotube video (ids) based on
a google autocompelte query
"""
url = "http://gdata.youtube.com/feeds/api/videos?q=" + \
encodeURIComponent(query) + "&format=5&max-results=" + \
numb_thumbs + "&v=2&alt=jsonc"
resp = requests.get(url).json()
data = resp["data"]
items = data["items"]
ids = [video["id"] for video in items]
return ids
if __name__ == '__main__':
for search in searches:
query = top_query(search)
ids = youtube_top_five(query)
for yt_id in ids:
clips, duration = backend.check_youtube(yt_id)
yt_dat = {'hotclips': clips, 'duration': duration}
backend.redis.setex(yt_id, yt_dat, backend.HOTCLIP_CACHE_TIME)
print 'Summarization data cached for id', yt_id, \
'~~~~ hotclips:', clips, 'duration:', duration