@@ -106,7 +106,7 @@ def check_entitlement(doi):
106
106
107
107
108
108
@_ensure_api_keys ('download article' )
109
- def download_article (id_val , id_type = 'doi' , on_retry = False ):
109
+ def download_article (id_val , id_type = 'doi' , max_retries = 2 , on_retry = False ):
110
110
"""Low level function to get an XML article for a particular id.
111
111
112
112
Parameters
@@ -115,6 +115,8 @@ def download_article(id_val, id_type='doi', on_retry=False):
115
115
The value of the id.
116
116
id_type : str
117
117
The type of id, such as pmid (a.k.a. pubmed_id), doi, or eid.
118
+ max_retries : int
119
+ The maximum number of retries for connection errors.
118
120
on_retry : bool
119
121
This function has a recursive retry feature, and this is the only time
120
122
this parameter should be used.
@@ -128,33 +130,46 @@ def download_article(id_val, id_type='doi', on_retry=False):
128
130
id_type = 'pubmed_id'
129
131
url = '%s/%s' % (elsevier_article_url_fmt % id_type , id_val )
130
132
params = {'httpAccept' : 'text/xml' }
131
- res = requests .get (url , params , headers = ELSEVIER_KEYS )
132
- if res .status_code == 404 :
133
- logger .info ("Resource for %s not available on elsevier." % url )
134
- return None
135
- elif res .status_code == 429 :
136
- if not on_retry :
137
- logger .warning ("Broke the speed limit. Waiting half a second then "
138
- "trying again..." )
139
- sleep (0.5 )
140
- return download_article (id_val , id_type , True )
141
- else :
142
- logger .error ("Still breaking speed limit after waiting." )
143
- logger .error ("Elsevier response: %s" % res .text )
144
- return None
145
- elif res .status_code != 200 :
146
- logger .error ('Could not download article %s: status code %d' %
147
- (url , res .status_code ))
148
- logger .error ('Elsevier response: %s' % res .text )
149
- return None
150
- else :
151
- content_str = res .content .decode ('utf-8' )
152
- if content_str .startswith ('<service-error>' ):
153
- logger .error ('Got a service error with 200 status: %s'
154
- % content_str )
155
- return None
156
- # Return the XML content as a unicode string, assuming UTF-8 encoding
157
- return content_str
133
+
134
+
135
+ for attempt in range (max_retries ):
136
+ try :
137
+ res = requests .get (url , params = params , headers = ELSEVIER_KEYS )
138
+ if res .status_code == 200 :
139
+ content_str = res .content .decode ('utf-8' )
140
+ if content_str .startswith ('<service-error>' ):
141
+ logger .error ('Got a service error with 200 status: %s'
142
+ % content_str )
143
+ return None
144
+ return content_str # Successfully retrieved article
145
+
146
+ elif res .status_code == 404 :
147
+ logger .info ("Resource for %s not available on elsevier." % url )
148
+ return None
149
+
150
+ elif res .status_code == 429 :
151
+ if not on_retry :
152
+ logger .warning (
153
+ "Broke the speed limit. Waiting half a second then "
154
+ "trying again..." )
155
+ sleep (0.5 )
156
+ return download_article (id_val , id_type , on_retry = True )
157
+ else :
158
+ logger .error ("Still breaking speed limit after waiting." )
159
+ logger .error ('Elsevier response: %s' % res .text )
160
+ return None
161
+
162
+ else :
163
+ logger .error (f"Elsevier API error { res .status_code } : { res .text } " )
164
+ return None
165
+
166
+ except requests .exceptions .ConnectionError as e :
167
+ wait_time = 2 ** attempt
168
+ logger .warning (f"Connection error: { e } . Retrying in { wait_time } seconds..." )
169
+ sleep (wait_time )
170
+
171
+ logger .error ("Max retries exceeded. Could not fetch article." )
172
+ return None
158
173
159
174
160
175
def download_article_from_ids (** id_dict ):
0 commit comments