Skip to content

Commit e167913

Browse files
authored
Merge pull request #122 from kevinmcmurtrie/patch-1
Fix VP8 low bitrate
2 parents a7fe235 + a7307d1 commit e167913

File tree

8 files changed

+186
-27
lines changed

8 files changed

+186
-27
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
- Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120
1313
- Suuport for Python 3.12, drop Python 3.7 #118
1414
- Replace "iso-369" iso639-lang by "iso639-lang" library
15+
- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2)
16+
- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value)
1517

1618
## [3.2.0] - 2023-12-16
1719

contrib/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
This folder contains some tooling around zimscraperlib:
3+
- `encode_video.py`: a small utility to encode a video with an existing video preset, just like a scraper would do

contrib/encode_video.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import sys
2+
from pathlib import Path
3+
from typing import List
4+
5+
from zimscraperlib import logger
6+
from zimscraperlib.video import presets, reencode
7+
8+
9+
def encode_video(src_path: Path, dst_path: Path, preset: str):
10+
if not src_path.exists():
11+
raise ValueError(f"{src_path} does not exists")
12+
try:
13+
preset_cls = getattr(presets, preset)
14+
except AttributeError:
15+
logger.error(f"{preset} preset not found")
16+
raise
17+
logger.info(f"Encoding video {src_path} with {preset} version {preset_cls.VERSION}")
18+
success, process = reencode(
19+
src_path=src_path,
20+
dst_path=dst_path,
21+
ffmpeg_args=preset_cls().to_ffmpeg_args(),
22+
with_process=True,
23+
) # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value)
24+
if not success:
25+
logger.error(f"conversion failed:\n{process.stdout}")
26+
27+
28+
def run(args: List[str] = sys.argv):
29+
if len(args) < 4: # noqa: PLR2004
30+
print(f"Usage: {args[0]} <src_path> <dst_path> <preset>") # noqa: T201
31+
print( # noqa: T201
32+
"\t<src_path>\tpath to the video to encode."
33+
"\t<dst_path>\tpath to the store the reencoded video."
34+
"\t<preset>\tname of preset to use."
35+
)
36+
return 1
37+
encode_video(Path(args[1]), Path(args[2]), args[3])
38+
return 0
39+
40+
41+
if __name__ == "__main__":
42+
sys.exit(run())

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ target-version = ['py38']
113113
[tool.ruff]
114114
target-version = "py38"
115115
line-length = 88
116-
src = ["src"]
116+
src = ["src", "contrib"]
117117

118118
[tool.ruff.lint]
119119
select = [
@@ -235,7 +235,7 @@ exclude_lines = [
235235
]
236236

237237
[tool.pyright]
238-
include = ["src", "tests", "tasks.py"]
238+
include = ["contrib", "src", "tests", "tasks.py"]
239239
exclude = [".env/**", ".venv/**"]
240240
extraPaths = ["src"]
241241
pythonVersion = "3.8"

src/zimscraperlib/video/encoding.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,41 +6,64 @@
66
import shutil
77
import subprocess
88
import tempfile
9+
from typing import List, Optional
910

1011
from zimscraperlib import logger
1112
from zimscraperlib.logging import nicer_args_join
1213

1314

15+
def _build_ffmpeg_args(
16+
src_path: pathlib.Path,
17+
tmp_path: pathlib.Path,
18+
ffmpeg_args: List[str],
19+
threads: Optional[int],
20+
) -> List[str]:
21+
if threads:
22+
if "-threads" in ffmpeg_args:
23+
raise AttributeError("Cannot set the number of threads, already set")
24+
else:
25+
ffmpeg_args += ["-threads", str(threads)]
26+
args = [
27+
"/usr/bin/env",
28+
"ffmpeg",
29+
"-y",
30+
"-i",
31+
f"file:{src_path}",
32+
*ffmpeg_args,
33+
f"file:{tmp_path}",
34+
]
35+
return args
36+
37+
1438
def reencode(
1539
src_path,
1640
dst_path,
1741
ffmpeg_args,
1842
delete_src=False, # noqa: FBT002
1943
with_process=False, # noqa: FBT002
2044
failsafe=True, # noqa: FBT002
45+
threads: Optional[int] = 1,
2146
):
2247
"""Runs ffmpeg with given ffmpeg_args
2348
2449
Arguments -
2550
src_path - Path to source file
2651
dst_path - Path to destination file
2752
ffmpeg_args - A list of ffmpeg arguments
53+
threads - Number of encoding threads used by ffmpeg
2854
delete_src - Delete source file after convertion
2955
with_process - Optionally return the output from ffmpeg (stderr and stdout)
3056
failsafe - Run in failsafe mode
3157
"""
3258

3359
with tempfile.TemporaryDirectory() as tmp_dir:
3460
tmp_path = pathlib.Path(tmp_dir).joinpath(f"video.tmp{dst_path.suffix}")
35-
args = [
36-
"/usr/bin/env",
37-
"ffmpeg",
38-
"-y",
39-
"-i",
40-
f"file:{src_path}",
41-
*ffmpeg_args,
42-
f"file:{tmp_path}",
43-
]
61+
args = _build_ffmpeg_args(
62+
src_path=src_path,
63+
tmp_path=tmp_path,
64+
ffmpeg_args=ffmpeg_args,
65+
threads=threads,
66+
)
4467
logger.debug(
4568
f"Encode {src_path} -> {dst_path} video format = {dst_path.suffix}"
4669
)

src/zimscraperlib/video/presets.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,20 @@ class VideoWebmLow(Config):
3232
"""Low Quality webm video
3333
3434
480:h format with height adjusted to keep aspect ratio
35-
300k video bitrate
36-
48k audio bitrate
37-
highly degraded quality (30, 42)"""
35+
128k target video bitrate but stay within quality boundaries.
36+
48k audio bitrate"""
3837

39-
VERSION = 1
38+
VERSION = 2
4039

4140
ext = "webm"
4241
mimetype = f"{preset_type}/webm"
4342

4443
options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = {
4544
"-codec:v": "libvpx", # video codec
4645
"-quality": "best", # codec preset
47-
"-b:v": "300k", # target video bitrate
48-
"-maxrate": "300k", # max video bitrate
49-
"-minrate": "300k", # min video bitrate
50-
"-qmin": "30", # min quantizer scale
51-
"-qmax": "42", # max quantizer scale
46+
"-b:v": "128k", # Adjust quantizer within min/max to target this bitrate
47+
"-qmin": "18", # Reduce the bitrate on very still videos
48+
"-qmax": "40", # Increase the bitrate on very busy videos
5249
"-vf": "scale='480:trunc(ow/a/2)*2'", # frame size
5350
"-codec:a": "libvorbis", # audio codec
5451
"-ar": "44100", # audio sampling rate

tests/video/test_encoding.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import re
2+
from pathlib import Path
3+
from typing import List, Optional
4+
5+
import pytest
6+
7+
from zimscraperlib.video.encoding import _build_ffmpeg_args
8+
9+
10+
@pytest.mark.parametrize(
11+
"src_path,tmp_path,ffmpeg_args,threads,expected",
12+
[
13+
(
14+
Path("path1/file1.mp4"),
15+
Path("path1/fileout.mp4"),
16+
[
17+
"-codec:v",
18+
"libx265",
19+
],
20+
None,
21+
[
22+
"/usr/bin/env",
23+
"ffmpeg",
24+
"-y",
25+
"-i",
26+
"file:path1/file1.mp4",
27+
"-codec:v",
28+
"libx265",
29+
"file:path1/fileout.mp4",
30+
],
31+
),
32+
(
33+
Path("path2/file2.mp4"),
34+
Path("path12/tmpfile.mp4"),
35+
[
36+
"-b:v",
37+
"300k",
38+
],
39+
1,
40+
[
41+
"/usr/bin/env",
42+
"ffmpeg",
43+
"-y",
44+
"-i",
45+
"file:path2/file2.mp4",
46+
"-b:v",
47+
"300k",
48+
"-threads",
49+
"1",
50+
"file:path12/tmpfile.mp4",
51+
],
52+
),
53+
(
54+
Path("path2/file2.mp4"),
55+
Path("path12/tmpfile.mp4"),
56+
[
57+
"-b:v",
58+
"300k",
59+
"-threads",
60+
"1",
61+
],
62+
1,
63+
None,
64+
),
65+
],
66+
)
67+
def test_build_ffmpeg_args(
68+
src_path: Path,
69+
tmp_path: Path,
70+
ffmpeg_args: List[str],
71+
threads: Optional[int],
72+
expected: Optional[List[str]],
73+
):
74+
if expected:
75+
assert (
76+
_build_ffmpeg_args(
77+
src_path=src_path,
78+
tmp_path=tmp_path,
79+
ffmpeg_args=ffmpeg_args,
80+
threads=threads,
81+
)
82+
== expected
83+
)
84+
else:
85+
with pytest.raises(
86+
AttributeError,
87+
match=re.escape("Cannot set the number of threads, already set"),
88+
):
89+
_build_ffmpeg_args(
90+
src_path=src_path,
91+
tmp_path=tmp_path,
92+
ffmpeg_args=ffmpeg_args,
93+
threads=threads,
94+
)

tests/video/test_video.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -141,20 +141,18 @@ def test_preset_has_mime_and_ext():
141141

142142
def test_preset_video_webm_low():
143143
config = VideoWebmLow()
144-
assert config.VERSION == 1
144+
assert config.VERSION == 2
145145
args = config.to_ffmpeg_args()
146-
assert len(args) == 24
146+
assert len(args) == 20
147147
options_map = [
148148
("codec:v", "libvpx"),
149149
("codec:a", "libvorbis"),
150-
("maxrate", "300k"),
151-
("minrate", "300k"),
152-
("b:v", "300k"),
150+
("b:v", "128k"),
153151
("ar", "44100"),
154152
("b:a", "48k"),
155153
("quality", "best"),
156-
("qmin", "30"),
157-
("qmax", "42"),
154+
("qmin", "18"),
155+
("qmax", "40"),
158156
("vf", "scale='480:trunc(ow/a/2)*2'"),
159157
]
160158
for option, val in options_map:

0 commit comments

Comments
 (0)