Skip to content

Commit

Permalink
Merge pull request #111 from openzim/handle_descriptions
Browse files Browse the repository at this point in the history
Add utility function to compute/check ZIM descriptions
  • Loading branch information
rgaudin authored Aug 22, 2023
2 parents 4f8c3cc + 591c6dc commit cb01a0f
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 6 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ All notable changes to this project are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).

## [3.1.1]
## [Unreleased]

### Added
- Add utility function to compute/check ZIM descriptions #110

## [3.1.1] - 2023-07-18

### Changed

Expand Down
1 change: 1 addition & 0 deletions src/zimscraperlib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
),
}

RECOMMENDED_MAX_TITLE_LENGTH = 30
MAXIMUM_DESCRIPTION_METADATA_LENGTH = 80
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH = 4000

Expand Down
59 changes: 57 additions & 2 deletions src/zimscraperlib/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
import pathlib
import shutil
import tempfile
from typing import Optional, Union
from typing import Optional, Tuple, Union

from . import logger
from .constants import MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH
from .constants import MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH
from .download import stream_file


def handle_user_provided_file(
source: Optional[Union[pathlib.Path, str]] = None,
dest: Optional[pathlib.Path] = None,
in_dir: pathlib.Path = None,
in_dir: Optional[pathlib.Path] = None,
nocopy: bool = False,
) -> Union[pathlib.Path, None]:
"""path to downloaded or copied a user provided file (URL or path)
Expand Down Expand Up @@ -49,3 +51,56 @@ def handle_user_provided_file(
shutil.copy(source, dest)

return dest


def compute_descriptions(
default_description: str,
user_description: Optional[str],
user_long_description: Optional[str],
) -> Tuple[str, Optional[str]]:
"""Computes short and long descriptions compliant with ZIM standard.
Based on provided parameters, the function computes a short and a long description
which are compliant with the ZIM standard (in terms of length).
User description(s) are used if set. They are checked to not exceed ZIM standard
maximum length ; an error is thrown otherwise ; if ok, they are returned.
If user_description is not set, the description is computed based on the default
description, truncated if needed.
If user_long_description is not set and default description is too long for the
description field, the long_description is computed based on the default description
(truncated if needed), otherwise no long description is returned.
args:
default_description: the description which will be used if user descriptions
are not set (typically fetched online)
user_description: the description set by the user (typically set by a
CLI argument)
user_long_description: the long description set by the user (typically set by a
CLI argument)
Returns a tuple of (description, long_description)
"""

if user_description and len(user_description) > MAX_DESC_LENGTH:
raise ValueError(
f"Description too long ({len(user_description)}>{MAX_DESC_LENGTH})"
)
if user_long_description and len(user_long_description) > MAX_LONG_DESC_LENGTH:
raise ValueError(
f"LongDescription too long ({len(user_long_description)}"
f">{MAX_LONG_DESC_LENGTH})"
)

if not user_long_description and len(default_description) > MAX_DESC_LENGTH:
user_long_description = default_description[0:MAX_LONG_DESC_LENGTH]
if len(default_description) > MAX_LONG_DESC_LENGTH:
user_long_description = user_long_description[:-1] + "…"
if not user_description:
user_description = default_description[0:MAX_DESC_LENGTH]
if len(default_description) > MAX_DESC_LENGTH:
user_description = user_description[:-1] + "…"

return (user_description, user_long_description)
3 changes: 2 additions & 1 deletion src/zimscraperlib/zim/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MANDATORY_ZIM_METADATA_KEYS,
MAXIMUM_DESCRIPTION_METADATA_LENGTH,
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
RECOMMENDED_MAX_TITLE_LENGTH,
)
from ..i18n import is_valid_iso_639_3
from ..image.probing import is_valid_image
Expand Down Expand Up @@ -40,7 +41,7 @@ def validate_standard_str_types(name: str, value: str):

def validate_title(name: str, value: str):
"""ensures Title metadata is within recommended length"""
if name == "Title" and len(value) > 30:
if name == "Title" and len(value) > RECOMMENDED_MAX_TITLE_LENGTH:
raise ValueError(f"{name} is too long.")


Expand Down
175 changes: 174 additions & 1 deletion tests/inputs/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@
# vim: ai ts=4 sts=4 et sw=4 nu

import pathlib
from typing import Optional

import pytest

from zimscraperlib.inputs import handle_user_provided_file
from zimscraperlib.constants import (
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
)
from zimscraperlib.constants import (
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
)
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file


def test_with_none():
Expand Down Expand Up @@ -72,3 +79,169 @@ def test_remote_indir(tmp_path, valid_http_url):
assert fpath is not None
assert fpath.exists()
assert fpath.parent == tmp_path


TEXT_NOT_USED = "text not used"

LONG_TEXT = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
"incididunt ut labore et dolore magna aliqua. At erat pellentesque adipiscing "
"commodo elit at imperdiet. Rutrum tellus pellentesque eu tincidunt tortor aliquam"
" nulla facilisi. Eget lorem dolor sed viverra ipsum nunc. Ipsum nunc aliquet "
"bibendum enim facilisis gravida neque convallis. Aliquam malesuada bibendum arcu "
"vitae elementum curabitur. Platea dictumst quisque sagittis purus sit amet "
"volutpat. Blandit libero volutpat sed cras ornare. In eu mi bibendum neque "
"egestas. Egestas dui id ornare arcu odio. Pulvinar neque laoreet suspendisse "
"interdum. Fames ac turpis egestas integer eget aliquet nibh praesent tristique. Et"
" egestas quis ipsum suspendisse ultrices gravida dictum fusce. Malesuada fames ac "
"turpis egestas. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada "
"proin libero. In arcu cursus euismod quis viverra. Faucibus in ornare quam viverra"
". Curabitur vitae nunc sed velit dignissim sodales ut eu sem. Velit scelerisque in"
" dictum non consectetur a erat nam. Proin fermentum leo vel orci porta non. Fames"
" ac turpis egestas sed tempus. Vitae justo eget magna fermentum iaculis eu non. "
"Imperdiet massa tincidunt nunc pulvinar sapien et ligula. Laoreet sit amet cursus "
"sit amet dictum sit amet. Quis hendrerit dolor magna eget. Orci ac auctor augue "
"mauris augue. Consequat interdum varius sit amet mattis. At ultrices mi tempus "
"imperdiet nulla malesuada pellentesque elit. Volutpat est velit egestas dui. "
"Potenti nullam ac tortor vitae. At tempor commodo ullamcorper a lacus vestibulum "
"sed arcu non. Duis ut diam quam nulla. Vestibulum mattis ullamcorper velit sed "
"ullamcorper. Sit amet commodo nulla facilisi nullam vehicula. Faucibus purus in "
"massa tempor nec feugiat. Sem fringilla ut morbi tincidunt augue interdum velit. "
"Etiam dignissim diam quis enim lobortis scelerisque fermentum dui. Nunc vel risus "
"commodo viverra maecenas accumsan. Aenean sed adipiscing diam donec adipiscing "
"tristique. Maecenas accumsan lacus vel facilisis volutpat est velit egestas. Nulla"
" aliquet porttitor lacus luctus accumsan tortor posuere ac. Habitant morbi "
"tristique senectus et netus et. Eget mi proin sed libero enim sed faucibus turpis "
"in. Vulputate enim nulla aliquet porttitor lacus. Dui ut ornare lectus sit amet "
"est. Quam lacus suspendisse faucibus interdum posuere. Sagittis orci a scelerisque"
" purus semper eget duis at tellus. Tellus molestie nunc non blandit massa. Feugiat"
" vivamus at augue eget arcu dictum varius duis at. Varius morbi enim nunc faucibus"
" a pellentesque sit. Id aliquet lectus proin nibh nisl condimentum id venenatis a."
" Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. Aliquam "
"malesuada bibendum arcu vitae elementum curabitur vitae nunc sed. Habitasse platea"
" dictumst quisque sagittis purus sit amet volutpat. Vitae auctor eu augue ut "
"lectus. At varius vel pharetra vel turpis nunc eget. Dictum at tempor commodo "
"ullamcorper a lacus vestibulum sed arcu. Pellentesque massa placerat duis "
"ultricies. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. "
"Volutpat blandit aliquam etiam erat velit scelerisque in. Amet mattis vulputate "
"enim nulla aliquet porttitor. Egestas maecenas pharetra convallis posuere morbi "
"leo urna molestie. Duis ut diam quam nulla porttitor massa id. In fermentum "
"posuere urna nec tincidunt praesent. Turpis egestas sed tempus urna et pharetra "
"pharetra massa. Tellus molestie nunc non blandit massa. Diam phasellus vestibulum "
"lorem sed risus ultricies. Egestas erat imperdiet sed euismod nisi porta lorem. "
"Quam viverra orci sagittis eu volutpat odio facilisis mauris sit. Ornare aenean "
"euismod elementum nisi quis. Laoreet non curabitur gravida arcu ac tortor "
"dignissim convallis aenean. Sagittis aliquam malesuada bibendum arcu vitae "
"elementum. Sed blandit libero volutpat sed cras ornare. Sagittis eu volutpat odio "
"facilisis mauris. Facilisis volutpat est velit egestas dui id ornare arcu odio. "
"Eu feugiat pretium nibh."
)


@pytest.mark.parametrize(
"user_description, user_long_description, default_description, raises, "
"expected_description, expected_long_description",
[
# user description set and is short, user long descripion not set, default
# description doe not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
TEXT_NOT_USED,
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
),
# user description set and is too long, default description does not matter
(LONG_TEXT[0 : MAX_DESC_LENGTH + 1], None, TEXT_NOT_USED, True, None, None),
# user description not set and default description is short enough
(
None,
None,
LONG_TEXT[0:MAX_DESC_LENGTH],
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
),
# user description not set and default description is too long for description
# but ok for long description
(
None,
None,
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
),
(
None,
None,
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
# user description not set and default description is too long for description
# and long description
(
None,
None,
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH - 1] + "…",
),
# user description set and is short, user long descripion set and is short,
# default description does not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
TEXT_NOT_USED,
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
# user description set and is short, user long descripion set and is too long,
# default description does not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
TEXT_NOT_USED,
True,
None,
None,
),
# user description not set, user long descripion set and is short,
# default description set to something different than long desc
(
None,
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
LONG_TEXT[10:MAX_LONG_DESC_LENGTH],
False,
LONG_TEXT[10 : MAX_DESC_LENGTH + 9] + "…",
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
],
)
def test_description(
user_description: str,
user_long_description: Optional[str],
default_description: str,
*,
raises: bool,
expected_description: str,
expected_long_description: str,
):
if raises:
with pytest.raises(ValueError):
compute_descriptions(
default_description, user_description, user_long_description
)
return
else:
(description, long_description) = compute_descriptions(
default_description, user_description, user_long_description
)

assert description == expected_description
assert long_description == expected_long_description
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@ deps =
pytest-cov
-r{toxinidir}/requirements.txt
commands =
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing {posargs}

[testenv:black]
deps =
black>=23.1.0,<24
commands = black --check .

[testenv:isort]
base_python=py38
deps =
isort>=5.12.0,<5.13
commands = isort --profile black --check src tests
Expand Down

0 comments on commit cb01a0f

Please sign in to comment.