Skip to content

Commit

Permalink
Update Science Museum ingester with API changes (#4105)
Browse files Browse the repository at this point in the history
* Update ScienceMuseum ingester to use new API

* Update tests

* Update url format

* Fix urls in tests

* Add filesize
  • Loading branch information
stacimc authored Apr 17, 2024
1 parent c9c72fd commit b5a1ae0
Show file tree
Hide file tree
Showing 12 changed files with 2,475 additions and 4,253 deletions.
46 changes: 27 additions & 19 deletions catalog/dags/providers/provider_api_scripts/science_museum.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,21 +115,22 @@ def get_record_data(self, record):
):
return None

title = attributes.get("summary_title")
title = ScienceMuseumDataIngester._get_first_list_value("title", attributes)
creator = self._get_creator_info(attributes)
metadata = self._get_metadata(attributes)
images = []
for image_data in multimedia:
if not (foreign_identifier := image_data.get("admin", {}).get("uid")):
if not (foreign_identifier := image_data.get("@admin", {}).get("uid")):
continue
processed = image_data.get("processed")
processed = image_data.get("@processed")
if not isinstance(processed, dict):
continue
(
url,
height,
width,
filetype,
filesize,
) = self._get_image_info(processed)
if not url:
continue
Expand All @@ -144,6 +145,7 @@ def get_record_data(self, record):
"height": height,
"width": width,
"filetype": filetype,
"filesize": filesize,
"license_info": license_info,
"creator": creator,
"title": title,
Expand All @@ -154,22 +156,18 @@ def get_record_data(self, record):

@staticmethod
def _get_creator_info(attributes):
creator_info = None
if (life_cycle := attributes.get("lifecycle")) is not None:
creation = life_cycle.get("creation")
if isinstance(creation, list):
maker = creation[0].get("maker")
if isinstance(maker, list):
creator_info = maker[0].get("summary_title")
return creator_info
if not (maker := attributes.get("creation", {}).get("maker", [])):
return None

return maker[0].get("summary", {}).get("title", None)

@staticmethod
def check_url(url: str | None) -> str | None:
if not url:
return None
if url.startswith("http"):
return url
return f"https://coimages.sciencemuseumgroup.org.uk/images/{url}"
return f"https://coimages.sciencemuseumgroup.org.uk/{url}"

@staticmethod
def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]:
Expand All @@ -191,15 +189,25 @@ def _get_dimensions(image_data: dict) -> tuple[int | None, int | None]:
@staticmethod
def _get_image_info(
processed: dict,
) -> tuple[str | None, int | None, int | None, str | None]:
height, width, filetype = None, None, None
) -> tuple[str | None, int | None, int | None, str | None, int | None]:
height, width, filetype, filesize = None, None, None, None
image_data = processed.get("large") or processed.get("medium", {})

url = ScienceMuseumDataIngester.check_url(image_data.get("location"))
if url:
filetype = image_data.get("format")
height, width = ScienceMuseumDataIngester._get_dimensions(image_data)
return url, height, width, filetype

if not (
filesize := int(
image_data.get("measurements", {})
.get("filesize", {})
.get("value", 0)
)
):
filesize = None

return url, height, width, filetype, filesize

@staticmethod
def _get_first_list_value(key: str, attributes: dict) -> str | None:
Expand All @@ -214,7 +222,7 @@ def _get_metadata(attributes):
for attr_key, metadata_key in [
("identifier", "accession number"),
("name", "name"),
("categories", "category"),
("category", "category"),
("description", "description"),
]:
val = ScienceMuseumDataIngester._get_first_list_value(attr_key, attributes)
Expand All @@ -223,7 +231,7 @@ def _get_metadata(attributes):

creditline = attributes.get("legal")
if isinstance(creditline, dict):
line = creditline.get("credit_line")
line = creditline.get("credit")
if line is not None:
metadata["creditline"] = line

Expand All @@ -233,9 +241,9 @@ def _get_metadata(attributes):
def _get_license_info(image_data) -> LicenseInfo | None:
# some items do not return license anywhere, but in the UI
# they look like CC
rights = image_data.get("source", {}).get("legal", {}).get("rights")
rights = image_data.get("legal", {}).get("rights")
if isinstance(rights, list):
license_name = rights[0].get("usage_terms")
license_name = rights[0].get("licence")
if not license_name:
return None
license_name = license_name.lower()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
{
"large": {
"resizable": true,
"@type": "image",
"format": "jpeg",
"modified": 1472014477000,
"location": "3/563/large_1999_0299_0001__0002_.jpg",
"location_is_relative": true,
"measurements": {
"filesize": {
"units": "bytes",
"value": 58772
},
"dimensions": [
{
"dimension": "height",
Expand All @@ -16,9 +23,6 @@
"value": 1536
}
]
},
"modified": 1472014477000,
"resizable": true,
"type": "image"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"measurements": {
"filesize": {
"units": "bytes",
"value": 58772
},
"dimensions": [
{
"dimension": "height",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"medium": {
"resizable": true,
"@type": "image",
"format": "jpeg",
"modified": 1472014477000,
"location": "3/563/medium_1999_0299_0001__0002_.jpg",
"location_is_relative": true,
"measurements": {
Expand All @@ -16,9 +19,6 @@
"value": 866
}
]
},
"modified": 1472014477000,
"resizable": true,
"type": "image"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"accession number": "1999-299/1",
"category": "SCM - Smoking",
"creditline": "McPhail, Mary",
"description": "Packet of 10 'Gold Flake' cigarettes by W D & HO Wills, England, 1920-1950",
"name": "cigarette packet"
"accession number": "A67864",
"category": "SCM - Classical & Medieval Medicine",
"description": "Small bronze amulet, possibly phallic, or stomach, probably Roman, from Italy, 200BC-200AD",
"name": "amulet",
"creditline": "Arte Antica e Moderna"
}
Loading

0 comments on commit b5a1ae0

Please sign in to comment.