forked from intel/AI-Playground
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_downloader.py
131 lines (116 loc) · 4.19 KB
/
file_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from io import BufferedWriter
import os
import time
import traceback
from typing import Callable
import requests
from threading import Thread
from exceptions import DownloadException
class FileDownloader:
on_download_progress: Callable[[str, int, int, int], None] = None
on_download_completed: Callable[[str, Exception], None] = None
url: str
filename: str
basename: str
total_size: int
download_size: int
download_stop: bool
prev_sec_download_size: int
def __init__(self):
self.download_stop = False
self.download_size = 0
self.completed = False
self.total_size = 0
self.prev_sec_download_size = 0
self.report_thread = None
def download_file(self, url: str, file_path: str):
self.url = url
self.basename = os.path.basename(file_path)
self.download_stop = False
self.filename = file_path
self.prev_sec_download_size = 0
self.download_size = 0
self.completed = False
self.report_thread = None
error = None
report_thread = None
try:
response, fw = self.__init_download(self.url, self.filename)
self.total_size = int(response.headers.get("Content-Length"))
if self.on_download_progress is not None:
report_thread = self.__start_report_download_progress()
self.__start_download(response, fw)
except Exception as e:
error = e
finally:
self.completed = True
if report_thread is not None:
report_thread.join()
if self.on_download_completed is not None:
self.on_download_completed(self.basename, error)
def __init_download(
self, url: str, file_path: str
) -> tuple[requests.Response, BufferedWriter]:
if os.path.exists(file_path):
start_pos = os.path.getsize(file_path)
else:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
start_pos = 0
if start_pos > 0:
# download skip exists part
response = requests.get(
url,
stream=True,
verify=False,
headers={"Range": f"bytes={start_pos}-"},
)
fw = open(file_path, "ab")
else:
response = requests.get(url, stream=True, verify=False)
fw = open(file_path, "wb")
return response, fw
def __start_download(self, response: requests.Response, fw: BufferedWriter):
retry = 0
while True:
try:
with response:
with fw:
for bytes in response.iter_content(chunk_size=4096):
self.download_size += bytes.__len__()
fw.write(bytes)
if self.download_stop:
print(
f"FileDownloader thread {Thread.native_id} exit by stop"
)
break
break
except Exception:
traceback.print_exc()
retry += 1
if retry > 3:
raise DownloadException(self.url)
else:
print(
f"FileDownloader thread {Thread.native_id} retry {retry} times"
)
time.sleep(1)
response, fw = self.__init_download(self.url, self.filename)
def __start_report_download_progress(self):
report_thread = Thread(target=self.__report_download_progress)
report_thread.start()
return report_thread
def __report_download_progress(self):
while (
not self.download_stop
and not self.completed
):
self.on_download_progress(
self.basename,
self.download_size,
self.total_size,
self.download_size - self.prev_sec_download_size,
)
self.prev_sec_download_size = self.download_size
time.sleep(1)
def stop_download(self):
self.download_stop = True