Skip to content

Commit

Permalink
use load_premium_users() in ScrapePremiumUsersExtension; added SCRAPE…
Browse files Browse the repository at this point in the history
…_PREMIUM_USERS_CONFIG_DIR setting
  • Loading branch information
MarkusShepherd committed Sep 14, 2023
1 parent e99c68c commit 7bd19ef
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ PULL_QUEUE_SUBSCRIPTION_RESPONSES=<pubsub-subscription-responses>
PULL_QUEUE_INTERVAL=300
# Scrape premium users
SCRAPE_PREMIUM_USERS_LIST=
SCRAPE_PREMIUM_USERS_CONFIG_DIR=
SCRAPE_PREMIUM_USERS_INTERVAL=1800
SCRAPE_PREMIUM_USERS_PREVENT_RESCRAPE_FOR=10800
# AWS credentials if you need access to S3
Expand Down
10 changes: 8 additions & 2 deletions board_game_scraper/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy.utils.misc import arg_to_iter
from scrapy_extensions import LoopingExtension

from .utils import now, pubsub_client
from .utils import load_premium_users, now, pubsub_client

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -163,9 +163,15 @@ def from_crawler(cls, crawler):
if not crawler.settings.getbool("SCRAPE_PREMIUM_USERS_ENABLED"):
raise NotConfigured

premium_users = tuple(
premium_users_list = tuple(
arg_to_iter(crawler.settings.getlist("SCRAPE_PREMIUM_USERS_LIST"))
)
premium_users_from_dir = tuple(
load_premium_users(
dirs=crawler.settings.get("SCRAPE_PREMIUM_USERS_CONFIG_DIR"),
)
)
premium_users = premium_users_list + premium_users_from_dir

if not premium_users:
raise NotConfigured
Expand Down
1 change: 1 addition & 0 deletions board_game_scraper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@
# Scrape premium users
SCRAPE_PREMIUM_USERS_ENABLED = True
SCRAPE_PREMIUM_USERS_LIST = os.getenv("SCRAPE_PREMIUM_USERS_LIST")
SCRAPE_PREMIUM_USERS_CONFIG_DIR = os.getenv("SCRAPE_PREMIUM_USERS_CONFIG_DIR")
SCRAPE_PREMIUM_USERS_INTERVAL = (
os.getenv("SCRAPE_PREMIUM_USERS_INTERVAL") or 60 * 60 # 1 hour
)
Expand Down

0 comments on commit 7bd19ef

Please sign in to comment.