Skip to content

Commit

Permalink
Fetch and populate place coordinates from Wikipedia (#1136)
Browse files Browse the repository at this point in the history
  • Loading branch information
bartfeenstra authored Jan 7, 2024
1 parent 932b0ed commit 5a6e70b
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 14 deletions.
4 changes: 0 additions & 4 deletions betty/extension/demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from contextlib import AsyncExitStack

from geopy import Point

from betty import load, generate
from betty.app import App
from betty.app.extension import Extension
Expand Down Expand Up @@ -103,7 +101,6 @@ async def load(self) -> None:
locale='uk',
),
],
coordinates=Point(52.366667, 4.9),
links={Link('https://nl.wikipedia.org/wiki/Amsterdam')},
)
self._load(Enclosure(encloses=amsterdam, enclosed_by=north_holland))
Expand All @@ -118,7 +115,6 @@ async def load(self) -> None:
locale='uk',
),
],
coordinates=Point(52.465556, 4.951111),
links={Link('https://nl.wikipedia.org/wiki/Ilpendam')},
)
self._load(Enclosure(encloses=ilpendam, enclosed_by=north_holland))
Expand Down
68 changes: 67 additions & 1 deletion betty/tests/test_wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import aiohttp
import pytest
from aiofiles.tempfile import TemporaryDirectory
from geopy import Point
from pytest_mock import MockerFixture

from betty.media_type import MediaType
Expand All @@ -22,7 +23,7 @@
from aioresponses import aioresponses

from betty.app import App
from betty.model.ancestry import Source, Link, Citation
from betty.model.ancestry import Source, Link, Citation, Place
from betty.wikipedia import Entry, _Retriever, NotAnEntryError, _parse_url, RetrievalError, _Populator


Expand Down Expand Up @@ -238,6 +239,53 @@ async def test_get_entry_with_client_error_should_raise_retrieval_error(
with pytest.raises(RetrievalError):
await retriever.get_entry(entry_language, entry_name)

@pytest.mark.parametrize('expected, response_pages_json', [
(None, {},),
# Almelo.
(Point(52.35, 6.66666667), {
'coordinates': [
{
'lat': 52.35,
'lon': 6.66666667,
'primary': True,
'globe': 'earth',
},
],
}),
# Tranquility Base.
(None, {
'coordinates': [
{
'lat': 0.6875,
'lon': 23.43333333,
'primary': True,
'globe': 'moon',
},
],
}),
])
async def test_get_place_coordinates_should_return(
self,
expected: Point | None,
response_pages_json: dict[str, Any],
aioresponses: aioresponses,
mocker: MockerFixture,
) -> None:
mocker.patch('sys.stderr')
entry_language = 'en'
entry_name = 'Amsterdam'
api_url = f'https://{entry_language}.wikipedia.org/w/api.php?action=query&titles={entry_name}&prop=coordinates&coprimary=primary&format=json&formatversion=2'
api_response_body = {
'query': {
'pages': [response_pages_json],
},
}
aioresponses.get(api_url, payload=api_response_body)
async with TemporaryDirectory() as cache_directory_path_str:
async with aiohttp.ClientSession() as session:
actual = await _Retriever(session, Path(cache_directory_path_str)).get_place_coordinates(entry_language, entry_name)
assert expected == actual


class TestPopulator:
@patch_cache
Expand Down Expand Up @@ -473,3 +521,21 @@ async def test_populate_should_add_translation_links(self, mocker: MockerFixture
assert MediaType('text/html') == link_nl.media_type
assert link_nl.description is not None
assert 'external' == link_nl.relationship

@patch_cache
async def test_populate_place_should_add_coordinates(self, mocker: MockerFixture) -> None:
m_retriever = mocker.patch('betty.wikipedia._Retriever', spec=_Retriever, new_callable=AsyncMock)
entry_language = 'en'
entry_name = 'Almelo'
coordinates = Point(52.35, 6.66666667)
m_retriever.get_place_coordinates.return_value = coordinates

link = Link(f'https://{entry_language}.wikipedia.org/wiki/{entry_name}')
place = Place(links={link})
app = App()
async with app:
app.project.ancestry.add(place)
sut = _Populator(app, m_retriever)
await sut.populate()

assert coordinates is place.coordinates
57 changes: 48 additions & 9 deletions betty/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@

import aiofiles
import aiohttp
from geopy import Point

from betty.app import App
from betty.asyncio import gather
from betty.functools import filter_suppress
from betty.locale import Localized, negotiate_locale, to_locale, get_data, LocaleNotFoundError, Localey
from betty.media_type import MediaType
from betty.model import Entity
from betty.model.ancestry import Link, HasLinks
from betty.model.ancestry import Link, HasLinks, Place


class WikipediaError(BaseException):
Expand Down Expand Up @@ -116,8 +117,7 @@ async def _get_page_data(self, url: str) -> Any:
raise RetrievalError('Could not successfully parse the JSON format returned by %s: %s' % (url, e))

async def get_translations(self, entry_language: str, entry_name: str) -> dict[str, str]:
url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (
entry_language, entry_name)
url = f'https://{entry_language}.wikipedia.org/w/api.php?action=query&titles={entry_name}&prop=langlinks&lllimit=500&format=json&formatversion=2'
page_data = await self._get_page_data(url)
try:
translations_data = page_data['langlinks']
Expand All @@ -127,14 +127,28 @@ async def get_translations(self, entry_language: str, entry_name: str) -> dict[s
return {translation_data['lang']: translation_data['title'] for translation_data in translations_data}

async def get_entry(self, language: str, name: str) -> Entry:
url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=extracts&exintro&format=json&formatversion=2' % (
language, name)
url = f'https://{language}.wikipedia.org/w/api.php?action=query&titles={name}&prop=extracts&exintro&format=json&formatversion=2'
page_data = await self._get_page_data(url)
try:
return Entry(language, name, page_data['title'], page_data['extract'])
except KeyError as e:
raise RetrievalError('Could not successfully parse the JSON content returned by %s: %s' % (url, e))

async def get_place_coordinates(self, language: str, name: str) -> Point | None:
url = f'https://{language}.wikipedia.org/w/api.php?action=query&titles={name}&prop=coordinates&coprimary=primary&format=json&formatversion=2'
page_data = await self._get_page_data(url)
try:
coordinates = page_data['coordinates'][0]
except KeyError:
# There may not be any coordinates.
return None
try:
if coordinates['globe'] != 'earth':
return None
return Point(coordinates['lat'], coordinates['lon'])
except KeyError as e:
raise RetrievalError('Could not successfully parse the JSON content returned by %s: %s' % (url, e))


class _Populator:
def __init__(self, app: App, retriever: _Retriever):
Expand All @@ -150,11 +164,15 @@ async def populate(self) -> None:
))

async def _populate_entity(self, entity: Entity, locales: set[str]) -> None:
if not isinstance(entity, HasLinks):
return
if isinstance(entity, HasLinks):
await self._populate_has_links(entity, locales)

if isinstance(entity, Place):
await self._populate_place(entity)

async def _populate_has_links(self, has_links: HasLinks, locales: set[str]) -> None:
entry_links: set[tuple[str, str]] = set()
for link in entity.links:
for link in has_links.links:
try:
entry_locale, entry_name = _parse_url(link.url)
except NotAnEntryError:
Expand Down Expand Up @@ -192,7 +210,7 @@ async def _populate_entity(self, entity: Entity, locales: set[str]) -> None:
continue
added_link = Link(added_entry.url)
await self.populate_link(added_link, added_entry_locale, added_entry)
entity.links.add(added_link)
has_links.links.add(added_link)
entry_links.add((added_entry_locale, added_entry_name))

async def populate_link(self, link: Link, entry_locale: str, entry: Entry | None = None) -> None:
Expand All @@ -210,3 +228,24 @@ async def populate_link(self, link: Link, entry_locale: str, entry: Entry | None
link.description = self._app.localizers.get_negotiated(link.locale)._('Read more on Wikipedia.')
if entry is not None and link.label is None:
link.label = entry.title

async def _populate_place(self, place: Place) -> None:
await self._populate_place_coordinates(place)

async def _populate_place_coordinates(self, place: Place) -> None:
if place.coordinates:
return

for link in place.links:
try:
entry_locale, entry_name = _parse_url(link.url)
except NotAnEntryError:
continue
else:
try:
get_data(entry_locale)
except LocaleNotFoundError:
continue
else:
with suppress(RetrievalError):
place.coordinates = await self._retriever.get_place_coordinates(entry_locale, entry_name)

0 comments on commit 5a6e70b

Please sign in to comment.