Skip to content

Commit 5dfc065

Browse files
authored
Merge pull request #278 from openzim/enhance_large_dl
Enhance large DL support for retries and custom UA
2 parents 9904777 + 82c2201 commit 5dfc065

File tree

3 files changed

+49
-14
lines changed

3 files changed

+49
-14
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add support for custom number of retries and user-agent in save_large_file (#278)
13+
1014
### Fixed
1115

1216
- Add proper typing @overload to `zimscraperlib.image.optimize_xxx` methods (#273)

src/zimscraperlib/download.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,21 +121,30 @@ class BestMp4(YoutubeConfig):
121121
}
122122

123123

124-
def save_large_file(url: str, fpath: pathlib.Path) -> None:
125-
"""download a binary file from its URL, using wget"""
124+
def save_large_file(
125+
url: str, fpath: pathlib.Path, retries: int = 5, user_agent: str | None = None
126+
) -> None:
127+
"""download a binary file from its URL, using wget
128+
129+
Arguments -
130+
url:
131+
"""
132+
command = [
133+
"/usr/bin/env",
134+
"wget",
135+
"-t",
136+
f"{retries}",
137+
"--retry-connrefused",
138+
"--random-wait",
139+
"-O",
140+
str(fpath),
141+
"-c",
142+
url,
143+
]
144+
if user_agent:
145+
command += ["-U", user_agent]
126146
subprocess.run(
127-
[
128-
"/usr/bin/env",
129-
"wget",
130-
"-t",
131-
"5",
132-
"--retry-connrefused",
133-
"--random-wait",
134-
"-O",
135-
str(fpath),
136-
"-c",
137-
url,
138-
],
147+
command,
139148
check=True,
140149
)
141150

tests/download/test_download.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,28 @@ def test_large_download_https(tmp_path: pathlib.Path, valid_https_url: str):
195195
assert_downloaded_file(valid_https_url, dest_file)
196196

197197

198+
@pytest.mark.slow
199+
def test_large_download_https_custom_retry(
200+
tmp_path: pathlib.Path, valid_https_url: str
201+
):
202+
dest_file = tmp_path / "favicon.ico"
203+
save_large_file(valid_https_url, dest_file, 1)
204+
assert_downloaded_file(valid_https_url, dest_file)
205+
206+
207+
@pytest.mark.slow
208+
def test_large_download_https_custom_ua(tmp_path: pathlib.Path, valid_https_url: str):
209+
dest_file = tmp_path / "favicon.ico"
210+
save_large_file(
211+
valid_https_url,
212+
dest_file,
213+
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
214+
"AppleWebKit/537.36 (KHTML, like Gecko) "
215+
"Chrome/120.0.0.0 Safari/537.36",
216+
)
217+
assert_downloaded_file(valid_https_url, dest_file)
218+
219+
198220
@pytest.mark.slow
199221
@pytest.mark.parametrize(
200222
"url,video_id",

0 commit comments

Comments
 (0)