Skip to content

Commit 300f72c

Browse files
authored
Merge pull request #1467 from haohangyan/elsevier_client
Fixed connection error in download_article
2 parents eef6017 + 463dade commit 300f72c

File tree

1 file changed

+43
-28
lines changed

1 file changed

+43
-28
lines changed

indra/literature/elsevier_client.py

+43-28
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def check_entitlement(doi):
106106

107107

108108
@_ensure_api_keys('download article')
109-
def download_article(id_val, id_type='doi', on_retry=False):
109+
def download_article(id_val, id_type='doi', max_retries=2, on_retry=False):
110110
"""Low level function to get an XML article for a particular id.
111111
112112
Parameters
@@ -115,6 +115,8 @@ def download_article(id_val, id_type='doi', on_retry=False):
115115
The value of the id.
116116
id_type : str
117117
The type of id, such as pmid (a.k.a. pubmed_id), doi, or eid.
118+
max_retries : int
119+
The maximum number of retries for connection errors.
118120
on_retry : bool
119121
This function has a recursive retry feature, and this is the only time
120122
this parameter should be used.
@@ -128,33 +130,46 @@ def download_article(id_val, id_type='doi', on_retry=False):
128130
id_type = 'pubmed_id'
129131
url = '%s/%s' % (elsevier_article_url_fmt % id_type, id_val)
130132
params = {'httpAccept': 'text/xml'}
131-
res = requests.get(url, params, headers=ELSEVIER_KEYS)
132-
if res.status_code == 404:
133-
logger.info("Resource for %s not available on elsevier." % url)
134-
return None
135-
elif res.status_code == 429:
136-
if not on_retry:
137-
logger.warning("Broke the speed limit. Waiting half a second then "
138-
"trying again...")
139-
sleep(0.5)
140-
return download_article(id_val, id_type, True)
141-
else:
142-
logger.error("Still breaking speed limit after waiting.")
143-
logger.error("Elsevier response: %s" % res.text)
144-
return None
145-
elif res.status_code != 200:
146-
logger.error('Could not download article %s: status code %d' %
147-
(url, res.status_code))
148-
logger.error('Elsevier response: %s' % res.text)
149-
return None
150-
else:
151-
content_str = res.content.decode('utf-8')
152-
if content_str.startswith('<service-error>'):
153-
logger.error('Got a service error with 200 status: %s'
154-
% content_str)
155-
return None
156-
# Return the XML content as a unicode string, assuming UTF-8 encoding
157-
return content_str
133+
134+
135+
for attempt in range(max_retries):
136+
try:
137+
res = requests.get(url, params=params, headers=ELSEVIER_KEYS)
138+
if res.status_code == 200:
139+
content_str = res.content.decode('utf-8')
140+
if content_str.startswith('<service-error>'):
141+
logger.error('Got a service error with 200 status: %s'
142+
% content_str)
143+
return None
144+
return content_str # Successfully retrieved article
145+
146+
elif res.status_code == 404:
147+
logger.info("Resource for %s not available on elsevier." % url)
148+
return None
149+
150+
elif res.status_code == 429:
151+
if not on_retry:
152+
logger.warning(
153+
"Broke the speed limit. Waiting half a second then "
154+
"trying again...")
155+
sleep(0.5)
156+
return download_article(id_val, id_type, on_retry=True)
157+
else:
158+
logger.error("Still breaking speed limit after waiting.")
159+
logger.error('Elsevier response: %s' % res.text)
160+
return None
161+
162+
else:
163+
logger.error(f"Elsevier API error {res.status_code}: {res.text}")
164+
return None
165+
166+
except requests.exceptions.ConnectionError as e:
167+
wait_time = 2 ** attempt
168+
logger.warning(f"Connection error: {e}. Retrying in {wait_time} seconds...")
169+
sleep(wait_time)
170+
171+
logger.error("Max retries exceeded. Could not fetch article.")
172+
return None
158173

159174

160175
def download_article_from_ids(**id_dict):

0 commit comments

Comments
 (0)