Skip to content

Commit

Permalink
Add played videos download feature
Browse files Browse the repository at this point in the history
  • Loading branch information
sina-rostami committed Mar 9, 2021
1 parent b434c9b commit 46ac83c
Showing 1 changed file with 33 additions and 26 deletions.
59 changes: 33 additions & 26 deletions downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,34 +114,41 @@ def download_other_files(self):
output_directory = './output/'+self.name_to_save+'/'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
print('Downloading pdfs...')
print('Downloading other files...')
index_stream_xml = ET.parse(
'./temp/' + self.name_to_save + '/indexstream.xml')
pdfs = index_stream_xml.findall(
'Message/Array/Object/newValue/documentDescriptor/downloadUrl')
for pdf in list(pdfs):
try:
pdf_name = re.split('/', pdf.text)[6][6:]
pdf_url = self.base_download_url + \
re.split('/', pdf.text)[4] + '/source/' + \
pdf_name + '?download=true'
pdf_name = requests.utils.unquote(pdf_name)
path_to_save = './output/' + self.name_to_save + \
'/' + pdf_name
if os.path.isfile(path_to_save):
continue
print('Downloading ' + pdf_name)
with self.dl_session.get(pdf_url, headers=self.download_headers, stream=True) as req:
with open(path_to_save, 'wb') as pdf_file:
t = tqdm(unit_scale=True, desc=pdf_name,
unit='B', total=int(req.headers['content-length']))
for data in req.iter_content(2048):
pdf_file.write(data)
t.update(2048)
t.close()
except:
continue
print('Pdfs Downloaded!')
for arr in index_stream_xml.findall('Message'):
if int(arr.get('time')) > 0:
files = arr.findall(
'Array/Object/newValue/documentDescriptor')
for file in list(files):
try:
if file.find('downloadUrl').text != None:
file_name = re.split('/', file.find('downloadUrl').text)[6][6:]
file_url = self.base_download_url + \
re.split('/', file.find('downloadUrl').text)[4] + '/source/' + \
file_name + '?download=true'
elif file.find('registerContentUrl').text != None:
file_name = file.find('theName').text
prefix = re.split('/', file.find('registerContentUrl').text)[1]
file_url = 'http://connect.kntu.ac.ir/' + prefix + '/output/' + file_name
file_name = requests.utils.unquote(file_name)
path_to_save = './output/' + self.name_to_save + \
'/' + file_name
if os.path.isfile(path_to_save):
continue
print('Downloading ' + file_name)
with self.dl_session.get(file_url, headers=self.download_headers, stream=True) as req:
with open(path_to_save, 'wb') as file_file:
t = tqdm(unit_scale=True, desc=file_name,
unit='B', total=int(req.headers['content-length']))
for data in req.iter_content(8192):
file_file.write(data)
t.update(8192)
t.close()
except :
continue
print('other files Downloaded!')

def remove_temp_directory(self):
if os.path.isdir('./temp'):
Expand Down

0 comments on commit 46ac83c

Please sign in to comment.