From e7a4d9ac8e01524a88f47f87b0f14c14f7704e07 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 16 Jan 2023 22:08:41 +0000 Subject: [PATCH] Fix #88 disable flickr harvest by default --- README.md | 12 ++++++++++++ fetch_feeds.py | 29 +++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e991e63c1..e8ee58b6a 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,18 @@ There is a github action that will run the tests automatically on PR submission, See ```.github/workflows/e2e.yml``` +## Content Harvesting + +You can harvest data from various feeds using the fetch_feeds.py script. By default +flickr harvesting is disabled in this script so run it manually and review the content +to see if it is suitable for publication on our site. + +```bash +./fetch_feeds.py --flickr=yes +``` + +This script is run nightly as a github action (see .github/workflows/update-feeds.yml). + ## Search Functionality The search functionality uses both [FuseJS](https://fusejs.io/) and [MarkJS](https://markjs.io/). diff --git a/fetch_feeds.py b/fetch_feeds.py index 9c6d8c882..17a424306 100755 --- a/fetch_feeds.py +++ b/fetch_feeds.py @@ -5,6 +5,7 @@ # (c) Tim Sutton, 2023 import requests +import argparse import json import shutil import os @@ -168,16 +169,28 @@ def fetch_blog_feed(showcase_type, rss_url): print(f"Writing: {image_filename}") del response + +parser = argparse.ArgumentParser(description='Import items from various feeds.') +parser.add_argument( + "--flickr", + help="Import flickr items (defaults to no)", + default="no", + type=bool, + required=False) +parser.parse_args() +args = parser.parse_args() + fetch_funders() -fetch_flickr_screenshots( - showcase_type="map", - rss_url = "https://api.flickr.com/services/feeds/groups_pool.gne?id=2244553@N22&lang=en-us&format=atom" -) -fetch_flickr_screenshots( - showcase_type="screenshot", - rss_url = "https://api.flickr.com/services/feeds/groups_pool.gne?id=2327386@N22&lang=en-us&format=atom" -) +if args.flickr: + fetch_flickr_screenshots( + showcase_type="map", + rss_url = "https://api.flickr.com/services/feeds/groups_pool.gne?id=2244553@N22&lang=en-us&format=atom" + ) + fetch_flickr_screenshots( + showcase_type="screenshot", + rss_url = "https://api.flickr.com/services/feeds/groups_pool.gne?id=2327386@N22&lang=en-us&format=atom" + ) # Planet blog aggregator fetch_blog_feed(