Skip to content
This repository has been archived by the owner on Mar 1, 2023. It is now read-only.

Commit

Permalink
More retry logic for Crossref
Browse files Browse the repository at this point in the history
  • Loading branch information
rabdill committed Jul 1, 2019
1 parent 16ae610 commit f6eb529
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion spider/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ def get_posted_dates(self):
self.record_article_posted_date(x[0], x[1])

def _pull_crossref_data_date(self, datestring, retry=True):
time.sleep(6)
# Datestring should be format YYYY-MM-DD
self.log.record(f"Beginning retrieval of Crossref data for {datestring}", "info")


headers = {'user-agent': config.user_agent}
try:
r = requests.get("{0}?obj-id.prefix=10.1101&from-occurred-date={1}&until-occurred-date={1}&source=twitter&mailto={2}&rows=10000".format(config.crossref["endpoints"]["events"], datestring, config.crossref["parameters"]["email"]), headers=headers)
Expand All @@ -136,14 +136,20 @@ def _pull_crossref_data_date(self, datestring, retry=True):

if r.status_code != 200:
self.log.record(f"Got weird status code: {r.status_code}", "error")
if retry:
return self._pull_crossref_data_date(datestring, retry=False)
return
results = r.json()

if results["status"] != "ok":
self.log.record(f'Crossref responded, but with unexpected status: {results["status"]}', "error")
if retry:
return self._pull_crossref_data_date(datestring, retry=False)
return
if "message" not in results.keys() or "events" not in results["message"].keys() or len(results["message"]["events"]) == 0:
self.log.record("Events not found in response.", "error")
if retry:
return self._pull_crossref_data_date(datestring, retry=False)
return

tweets = defaultdict(list)
Expand Down

0 comments on commit f6eb529

Please sign in to comment.