Skip to content

Commit b9b7592

Browse files
authored
Merge pull request #17 from dan-and/courses_support
Adding support for integrated courses
2 parents bc23eae + f692408 commit b9b7592

File tree

2 files changed

+81
-5
lines changed

2 files changed

+81
-5
lines changed

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@ Automatically download all your eBooks and videos. (See: [PacktPub Free Daily Bo
1212
##### Example: Download videos, their cover image, and accompanying source code
1313
python downloader.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -v video,cover,code
1414

15+
##### Example: Download Integrated Courses (Interactive-Ebooks), their cover image, and accompanying source code
16+
python downloader.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -c course,cover,code
17+
1518
##### Example: Download everything
16-
python downloader.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,cover,code,info -v video,cover,code
19+
python downloader.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,cover,code,info -v video,cover,code -c course,cover,code
1720

1821

1922
## Commandline Options
@@ -22,6 +25,7 @@ Automatically download all your eBooks and videos. (See: [PacktPub Free Daily Bo
2225
- *-d*, *--directory* = Directory to download into. Default is "packtpub_media/" in the current directory
2326
- *-v*, *--videos* = Assets to download. Options are: *video,cover,code*
2427
- *-b*, *--books* = Assets to download. Options are: *pdf,mobi,epub,cover,code,info*
28+
- *-c*, *--courses* = Assets to download. Options are: *course,cover,code*
2529

2630
**Video Assets**
2731

@@ -38,6 +42,12 @@ Automatically download all your eBooks and videos. (See: [PacktPub Free Daily Bo
3842
- *code*: Accompanying source code
3943
- *info*: Creates a JSON file with the title, ISBN, # of pages, and description. (note: it slows downloads)
4044

45+
**Course Assets**
46+
47+
- *course*: The interactive ebook (with integrated videos etc.)
48+
- *cover*: Cover image
49+
- *code*: Accompanying source code
50+
4151

4252
## Dependencies:
4353

downloader.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def download_to_file(filepath, url, session, headers, prefix_url=True):
5454

5555
# creates a json file with info
5656
def save_book_details(book, title, directory, session, headers):
57-
57+
5858
# fetch the product page
5959
product_url = book.xpath(".//div[contains(@class,'product-thumbnail')]//a/@href")
6060
product_page = session.get("https://www.packtpub.com" + product_url[0], verify=True, headers=headers)
@@ -192,6 +192,50 @@ def download_video(video, directory, assets, session, headers):
192192
if not os.listdir(video_directory):
193193
os.rmdir(video_directory)
194194

195+
# download course
196+
def download_course(course, directory, assets, session, headers):
197+
198+
# scrub the title
199+
title = course.xpath("@title")[0].replace("/","-").replace(" [course]","").replace(":", " -").strip()
200+
201+
# path to save the file
202+
course_directory = os.path.join(directory, title)
203+
204+
# create the directory if doesn't exist
205+
if not os.path.exists(course_directory):
206+
os.makedirs(course_directory)
207+
208+
# the title sometimes contains some weird characters that python could not print
209+
print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
210+
print(title.encode(sys.stdout.encoding, errors='replace').decode())
211+
212+
# get the download links
213+
code = course.xpath(".//div[contains(@class,'download-container')]//a[contains(@href,'/code_download')]/@href")
214+
image = course.xpath(".//div[contains(@class,'product-thumbnail')]//img/@src")
215+
course = course.xpath(".//div[contains(@class,'download-container')]//a[contains(@href,'/video_download')]/@href")
216+
217+
# course
218+
if len(course) > 0 and 'course' in assets:
219+
filename = os.path.join(course_directory, title + " [course].zip")
220+
print("Downloading COURSE")
221+
download_to_file(filename, course[0], session, headers)
222+
223+
# code
224+
if len(code) > 0 and 'code' in assets:
225+
filename = os.path.join(course_directory, title + " [CODE].zip")
226+
print("Downloading CODE")
227+
download_to_file(filename, code[0], session, headers)
228+
229+
# cover image
230+
if len(image) > 0 and 'cover' in assets:
231+
filename = os.path.join(course_directory, title + ".jpg")
232+
image_url = "https:" + image[0].replace("/imagecache/thumbview", "", 1)
233+
print("Downloading IMAGE")
234+
download_to_file(filename, image_url, session, headers, False)
235+
236+
# delete directory if it's empty
237+
if not os.listdir(course_directory):
238+
os.rmdir(course_directory)
195239

196240
def main(argv):
197241
headers = {
@@ -202,11 +246,12 @@ def main(argv):
202246
root_directory = 'packtpub_media'
203247
book_assets = None # 'pdf,mobi,epub,cover,code'
204248
video_assets = None # 'video,cover,code'
205-
errorMessage = 'Usage: downloader.py -e <email> -p <password> [-d <directory> -b <book assets> -v <video assets>]'
249+
course_assets = None # 'course,cover,code'
250+
errorMessage = 'Usage: downloader.py -e <email> -p <password> [-d <directory> -b <book assets> -v <video assets> -c <course assets>]'
206251

207252
# get the command line arguments/options
208253
try:
209-
opts, args = getopt.getopt(argv,"e:p:d:b:v:",["email=","pass=","directory=","books=","videos="])
254+
opts, args = getopt.getopt(argv,"e:p:d:b:v:c:",["email=","pass=","directory=","books=","videos=","courses="])
210255
except getopt.GetoptError:
211256
print(errorMessage)
212257
sys.exit(2)
@@ -223,6 +268,9 @@ def main(argv):
223268
book_assets = arg
224269
elif opt in ('-v','--videos'):
225270
video_assets = arg
271+
elif opt in ('-c','--courses'):
272+
course_assets = arg
273+
226274

227275
# do we have the minimum required info?
228276
if not email or not password:
@@ -301,8 +349,26 @@ def main(argv):
301349
videos_directory = os.path.join(root_directory, "videos")
302350
download_video(video, videos_directory, video_assets, session, headers)
303351

352+
if course_assets:
353+
354+
# get the list of videos
355+
courses_page = session.get("https://www.packtpub.com/account/my-courses", verify=True, headers=headers)
356+
courses_tree = html.fromstring(courses_page.content)
357+
course_nodes = courses_tree.xpath("//div[@id='product-account-list']/div[contains(@class,'product-line unseen')]")
358+
359+
print('###########################################################################')
360+
print("FOUND {0} INTEGRATED COURSES: STARTING DOWNLOADS".format(len(course_nodes)))
361+
print('###########################################################################')
362+
363+
# loop through the videos
364+
for course in course_nodes:
365+
366+
# download the book
367+
courses_directory = os.path.join(root_directory, "courses")
368+
download_course(course, courses_directory, course_assets, session, headers)
369+
304370

305371
if __name__ == "__main__":
306-
reload(sys)
372+
reload(sys)
307373
sys.setdefaultencoding('utf8')
308374
main(sys.argv[1:])

0 commit comments

Comments
 (0)