@@ -54,7 +54,7 @@ def download_to_file(filepath, url, session, headers, prefix_url=True):
54
54
55
55
# creates a json file with info
56
56
def save_book_details (book , title , directory , session , headers ):
57
-
57
+
58
58
# fetch the product page
59
59
product_url = book .xpath (".//div[contains(@class,'product-thumbnail')]//a/@href" )
60
60
product_page = session .get ("https://www.packtpub.com" + product_url [0 ], verify = True , headers = headers )
@@ -192,6 +192,50 @@ def download_video(video, directory, assets, session, headers):
192
192
if not os .listdir (video_directory ):
193
193
os .rmdir (video_directory )
194
194
195
+ # download course
196
+ def download_course (course , directory , assets , session , headers ):
197
+
198
+ # scrub the title
199
+ title = course .xpath ("@title" )[0 ].replace ("/" ,"-" ).replace (" [course]" ,"" ).replace (":" , " -" ).strip ()
200
+
201
+ # path to save the file
202
+ course_directory = os .path .join (directory , title )
203
+
204
+ # create the directory if doesn't exist
205
+ if not os .path .exists (course_directory ):
206
+ os .makedirs (course_directory )
207
+
208
+ # the title sometimes contains some weird characters that python could not print
209
+ print ('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' )
210
+ print (title .encode (sys .stdout .encoding , errors = 'replace' ).decode ())
211
+
212
+ # get the download links
213
+ code = course .xpath (".//div[contains(@class,'download-container')]//a[contains(@href,'/code_download')]/@href" )
214
+ image = course .xpath (".//div[contains(@class,'product-thumbnail')]//img/@src" )
215
+ course = course .xpath (".//div[contains(@class,'download-container')]//a[contains(@href,'/video_download')]/@href" )
216
+
217
+ # course
218
+ if len (course ) > 0 and 'course' in assets :
219
+ filename = os .path .join (course_directory , title + " [course].zip" )
220
+ print ("Downloading COURSE" )
221
+ download_to_file (filename , course [0 ], session , headers )
222
+
223
+ # code
224
+ if len (code ) > 0 and 'code' in assets :
225
+ filename = os .path .join (course_directory , title + " [CODE].zip" )
226
+ print ("Downloading CODE" )
227
+ download_to_file (filename , code [0 ], session , headers )
228
+
229
+ # cover image
230
+ if len (image ) > 0 and 'cover' in assets :
231
+ filename = os .path .join (course_directory , title + ".jpg" )
232
+ image_url = "https:" + image [0 ].replace ("/imagecache/thumbview" , "" , 1 )
233
+ print ("Downloading IMAGE" )
234
+ download_to_file (filename , image_url , session , headers , False )
235
+
236
+ # delete directory if it's empty
237
+ if not os .listdir (course_directory ):
238
+ os .rmdir (course_directory )
195
239
196
240
def main (argv ):
197
241
headers = {
@@ -202,11 +246,12 @@ def main(argv):
202
246
root_directory = 'packtpub_media'
203
247
book_assets = None # 'pdf,mobi,epub,cover,code'
204
248
video_assets = None # 'video,cover,code'
205
- errorMessage = 'Usage: downloader.py -e <email> -p <password> [-d <directory> -b <book assets> -v <video assets>]'
249
+ course_assets = None # 'course,cover,code'
250
+ errorMessage = 'Usage: downloader.py -e <email> -p <password> [-d <directory> -b <book assets> -v <video assets> -c <course assets>]'
206
251
207
252
# get the command line arguments/options
208
253
try :
209
- opts , args = getopt .getopt (argv ,"e:p:d:b:v:" ,["email=" ,"pass=" ,"directory=" ,"books=" ,"videos=" ])
254
+ opts , args = getopt .getopt (argv ,"e:p:d:b:v:c: " ,["email=" ,"pass=" ,"directory=" ,"books=" ,"videos=" , "courses =" ])
210
255
except getopt .GetoptError :
211
256
print (errorMessage )
212
257
sys .exit (2 )
@@ -223,6 +268,9 @@ def main(argv):
223
268
book_assets = arg
224
269
elif opt in ('-v' ,'--videos' ):
225
270
video_assets = arg
271
+ elif opt in ('-c' ,'--courses' ):
272
+ course_assets = arg
273
+
226
274
227
275
# do we have the minimum required info?
228
276
if not email or not password :
@@ -301,8 +349,26 @@ def main(argv):
301
349
videos_directory = os .path .join (root_directory , "videos" )
302
350
download_video (video , videos_directory , video_assets , session , headers )
303
351
352
+ if course_assets :
353
+
354
+ # get the list of videos
355
+ courses_page = session .get ("https://www.packtpub.com/account/my-courses" , verify = True , headers = headers )
356
+ courses_tree = html .fromstring (courses_page .content )
357
+ course_nodes = courses_tree .xpath ("//div[@id='product-account-list']/div[contains(@class,'product-line unseen')]" )
358
+
359
+ print ('###########################################################################' )
360
+ print ("FOUND {0} INTEGRATED COURSES: STARTING DOWNLOADS" .format (len (course_nodes )))
361
+ print ('###########################################################################' )
362
+
363
+ # loop through the videos
364
+ for course in course_nodes :
365
+
366
+ # download the book
367
+ courses_directory = os .path .join (root_directory , "courses" )
368
+ download_course (course , courses_directory , course_assets , session , headers )
369
+
304
370
305
371
if __name__ == "__main__" :
306
- reload (sys )
372
+ reload (sys )
307
373
sys .setdefaultencoding ('utf8' )
308
374
main (sys .argv [1 :])
0 commit comments