From f6eb529bb3f2b7e1e780494ff9cb3422a3880601 Mon Sep 17 00:00:00 2001 From: Rich Abdill Date: Mon, 1 Jul 2019 11:26:39 -0500 Subject: [PATCH] More retry logic for Crossref --- spider/spider.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spider/spider.py b/spider/spider.py index 801a900..bb12750 100644 --- a/spider/spider.py +++ b/spider/spider.py @@ -118,10 +118,10 @@ def get_posted_dates(self): self.record_article_posted_date(x[0], x[1]) def _pull_crossref_data_date(self, datestring, retry=True): + time.sleep(6) # Datestring should be format YYYY-MM-DD self.log.record(f"Beginning retrieval of Crossref data for {datestring}", "info") - headers = {'user-agent': config.user_agent} try: r = requests.get("{0}?obj-id.prefix=10.1101&from-occurred-date={1}&until-occurred-date={1}&source=twitter&mailto={2}&rows=10000".format(config.crossref["endpoints"]["events"], datestring, config.crossref["parameters"]["email"]), headers=headers) @@ -136,14 +136,20 @@ def _pull_crossref_data_date(self, datestring, retry=True): if r.status_code != 200: self.log.record(f"Got weird status code: {r.status_code}", "error") + if retry: + return self._pull_crossref_data_date(datestring, retry=False) return results = r.json() if results["status"] != "ok": self.log.record(f'Crossref responded, but with unexpected status: {results["status"]}', "error") + if retry: + return self._pull_crossref_data_date(datestring, retry=False) return if "message" not in results.keys() or "events" not in results["message"].keys() or len(results["message"]["events"]) == 0: self.log.record("Events not found in response.", "error") + if retry: + return self._pull_crossref_data_date(datestring, retry=False) return tweets = defaultdict(list)