-
Notifications
You must be signed in to change notification settings - Fork 2
/
algo-visualize2docset.py
118 lines (92 loc) · 3.59 KB
/
algo-visualize2docset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import requests, sqlite3, os, urllib, urllib2
from bs4 import BeautifulSoup as bs
# docset config
docset_name = 'algorithms-visualized.docset'
output = docset_name + '/Contents/Resources/Documents/'
# create docset directory
if not os.path.exists(output): os.makedirs(output)
# add icon
icon = 'https://dl.dropboxusercontent.com/u/12327637/algo-visualize.png'
urllib.urlretrieve(icon, docset_name + "/icon.png")
def update_db(name, path):
typ = 'Guide'
cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,))
fetched = cur.fetchone()
if fetched is None:
cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?,?,?)', (name, typ, path))
print('DB add >> name: %s, path: %s' % (name, path))
else:
print("record exists")
def create_subdir(path, output):
currentdir = output
subdir = path.split('/')
# if there's subfolder, create it and append it to current directory
if len(subdir) > 1:
for i in range(0, len(subdir) - 1, 2):
currentdir = output + subdir[i] + '/'
if not os.path.exists(currentdir): os.makedirs(currentdir)
return currentdir
def download_page(currentdir, page_url, page):
if len(page.split('/')) > 1:
page = page.split('/')[-1]
try:
res = urllib2.urlopen(page_url)
open(os.path.join(currentdir, page), 'wb').write(res.read())
print "downloaded doc: ", page
except:
print " X"
pass
def add_docs():
root_url = 'http://www.cs.usfca.edu/~galles/visualization/Algorithms.html'
# start souping index_page
data = requests.get(root_url).text
soup = bs(data)
# add the title of index_page to db index
page_title = 'Algorithms and Data structures visualized'
index_page = root_url.split('/')[-1]
update_db(page_title, index_page)
# extract href(s) and name(s) from the index_page
for link in soup.findAll('a'):
name = link.text.strip()
path = link.get('href')
# add to db index with links needed
filtered = ['source.html', 'bugfeature.html', 'about.html', 'Algorithms.html' ,'faq.html' ,'java/visualization.html' ,'flash.html', 'contact.html']
if name is not None and not path.startswith('http') and path not in filtered:
name = 'visualize ' + name
update_db(name, path)
def add_infoplist():
name = docset_name.split('.')[0]
info = " <?xml version=\"1.0\" encoding=\"UTF-8\"?>" \
"<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\"> " \
"<plist version=\"1.0\"> " \
"<dict> " \
" <key>CFBundleIdentifier</key> " \
" <string>{0}</string> " \
" <key>CFBundleName</key> " \
" <string>{1}</string>" \
" <key>DocSetPlatformFamily</key>" \
" <string>{2}</string>" \
" <key>isDashDocset</key>" \
" <true/>" \
" <key>isJavaScriptEnabled</key>" \
" <true/>" \
" <key>dashIndexFilePath</key>" \
" <string>{3}</string>" \
"</dict>" \
"</plist>".format(name, name, name, index_page)
open(docset_name + '/Contents/info.plist', 'wb').write(info)
if __name__ == '__main__':
db = sqlite3.connect(docset_name + '/Contents/Resources/docSet.dsidx')
cur = db.cursor()
try:
cur.execute('DROP TABLE searchIndex;')
except:
pass
cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);')
cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
# start
add_docs()
add_infoplist()
# # commit and close db
db.commit()
db.close()