Skip to content

Commit

Permalink
Merge pull request #15 from kingsdigitallab/v0.1.0
Browse files Browse the repository at this point in the history
feat(project): change download_workflow to download thumbnails instea…
  • Loading branch information
kallewesterling authored Mar 24, 2023
2 parents f333989 + 48968b0 commit c2d74b7
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
14 changes: 14 additions & 0 deletions tests/test_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pytest

from zoonyper.project import Project


class TestProject:
def setup_method(self):
self.project = Project()

def test_get_thumbail_url(self):
assert self.project.get_thumbnail_url("") == ""
assert self.project.get_thumbnail_url("http://image.url") == f"{self.project.thumbnails_url}image.url"
assert self.project.get_thumbnail_url("https://image.url") == f"{self.project.thumbnails_url}image.url"
assert self.project.get_thumbnail_url("ftp://image.url") == "ftp://image.url"
28 changes: 27 additions & 1 deletion zoonyper/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import os
import random
import re
import requests
import time

Expand Down Expand Up @@ -56,6 +57,9 @@ class Project(Utils):
If specified, a list of column names to be parsed as datetime objects
when reading the CSV files. The default value is "%Y-%m-%d", which
will parse columns named "created_at" and "updated_at".
thumbnails_url : str, optional
Base URL to download thumbnails, it defaults to
`https://thumbnails.zooniverse.org/100x100/`.
Raises
------
Expand Down Expand Up @@ -103,6 +107,7 @@ def __init__(
redact_users: bool = True,
trim_paths: bool = True,
parse_dates: str = "%Y-%m-%d",
thumbnails_url: str = "https://thumbnails.zooniverse.org/100x100/"
):
"""
Constructor method.
Expand Down Expand Up @@ -168,6 +173,8 @@ def __init__(
self.trim_paths = trim_paths
self.parse_dates = parse_dates

self.thumbnails_url = thumbnails_url

@staticmethod
def _user_logged_in(row: pd.Series) -> bool:
"""
Expand Down Expand Up @@ -707,7 +714,7 @@ def download_workflow(
file_name = url.split("/")[-1]
save_file = Path(current_dir / Path(file_name))
if not save_file.exists():
r = requests.get(url, timeout=timeout)
r = requests.get(self.get_thumbnail_url(url), timeout=timeout)
save_file.write_bytes(r.content)
has_downloaded = True

Expand All @@ -716,6 +723,25 @@ def download_workflow(

return True

def get_thumbnail_url(self, image_url: str) -> str:
"""
Get the thumbail URL for the given image URL.
Parameters
----------
image_url : str
URL to get the thumbnail URL for.
Returns
-------
str
Thumbnail URL.
"""
if image_url:
return re.sub("^https?://", self.thumbnails_url, image_url)

return image_url

@property
def inactive_workflow_ids(self) -> list:
"""
Expand Down

0 comments on commit c2d74b7

Please sign in to comment.