From b6cd79fa03014c94e366cf112a6d9224d49134ec Mon Sep 17 00:00:00 2001 From: David Mandelberg Date: Wed, 8 Nov 2023 15:36:58 -0500 Subject: [PATCH] Add functions to get labels and descriptions --- rock_paper_sand/wikidata.py | 24 +++++++++++++++- rock_paper_sand/wikidata_test.py | 47 +++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/rock_paper_sand/wikidata.py b/rock_paper_sand/wikidata.py index cf10a12..905b1a0 100644 --- a/rock_paper_sand/wikidata.py +++ b/rock_paper_sand/wikidata.py @@ -14,7 +14,7 @@ """Code that uses Wikidata's APIs.""" import collections -from collections.abc import Generator, Iterable, Sequence, Set +from collections.abc import Generator, Iterable, Mapping, Sequence, Set import contextlib import dataclasses import datetime @@ -71,6 +71,28 @@ def requests_session() -> Generator[requests.Session, None, None]: yield session +def _language_keyed_string( + mapping: Mapping[str, Any], + languages: Sequence[str], +) -> str | None: + # https://doc.wikimedia.org/Wikibase/master/php/docs_topics_json.html#json_fingerprint + for language in languages: + if language in mapping: + return mapping[language]["value"] + for other_language, record in mapping.items(): + if other_language.startswith(f"{language}-"): + return record["value"] + return None + + +def _label(item: Any, languages: Sequence[str]) -> str | None: + return _language_keyed_string(item["labels"], languages) + + +def _description(item: Any, languages: Sequence[str]) -> str | None: + return _language_keyed_string(item["descriptions"], languages) + + def _truthy_statements( item: Any, prop: wikidata_value.Property ) -> Sequence[Any]: diff --git a/rock_paper_sand/wikidata_test.py b/rock_paper_sand/wikidata_test.py index 302d333..77580fb 100644 --- a/rock_paper_sand/wikidata_test.py +++ b/rock_paper_sand/wikidata_test.py @@ -14,7 +14,7 @@ # pylint: disable=missing-module-docstring -from collections.abc import Collection, Mapping, Sequence, Set +from collections.abc import Callable, Collection, Mapping, Sequence, Set import datetime from typing import Any from unittest import mock @@ -318,6 +318,51 @@ def test_related_media_error(self) -> None: class WikidataUtilsTest(parameterized.TestCase): # pylint: disable=protected-access + @parameterized.product( + ( + dict(function=wikidata._label, section="labels"), + dict(function=wikidata._description, section="descriptions"), + ), + ( + dict( + mapping={}, + languages=("en",), + expected_value=None, + ), + dict( + mapping={"en": {"value": "foo"}}, + languages=(), + expected_value=None, + ), + dict( + mapping={ + "en": {"value": "foo"}, + "en-us": {"value": "bar"}, + }, + languages=("qa", "en"), + expected_value="foo", + ), + dict( + mapping={"en-us": {"value": "foo"}}, + languages=("en",), + expected_value="foo", + ), + ), + ) + def test_language_keyed_string( + self, + *, + function: Callable[[Any, Sequence[str]], str | None], + section: str, + mapping: Mapping[str, Any], + languages: Sequence[str], + expected_value: str | None, + ) -> None: + self.assertEqual( + expected_value, + function({section: mapping}, languages), + ) + @parameterized.named_parameters( dict( testcase_name="preferred",