6
6
import requests
7
7
8
8
9
- db_index_url = 'http://physionet.org/physiobank/database/'
9
+ DB_INDEX_URL = 'http://physionet.org/physiobank/database/'
10
10
11
11
12
+ def _remote_file_size (url = None , file_name = None , pb_dir = None ):
13
+ """
14
+ Get the remote file size in bytes
15
+
16
+ Parameters
17
+ ----------
18
+ url : str, optional
19
+ The full url of the file. Use this option to explicitly
20
+ state the full url.
21
+ file_name : str, optional
22
+ The base file name. Use this argument along with pb_dir if you
23
+ want the full url to be constructed.
24
+ pb_dir : str, optional
25
+ The base file name. Use this argument along with file_name if
26
+ you want the full url to be constructed.
27
+
28
+ Returns
29
+ -------
30
+ remote_file_size : int
31
+ Size of the file in bytes
32
+
33
+ """
34
+
35
+ # Option to construct the url
36
+ if file_name and pb_dir :
37
+ url = posixpath .join (DB_INDEX_URL , pb_dir , file_name )
38
+
39
+ response = requests .head (url , headers = {'Accept-Encoding' : 'identity' })
40
+ # Raise HTTPError if invalid url
41
+ response .raise_for_status ()
42
+
43
+ # Supposed size of the file
44
+ remote_file_size = int (response .headers ['content-length' ])
45
+
46
+ return remote_file_size
12
47
13
48
def _stream_header (file_name , pb_dir ):
14
49
"""
@@ -25,14 +60,14 @@ def _stream_header(file_name, pb_dir):
25
60
26
61
"""
27
62
# Full url of header location
28
- url = posixpath .join (db_index_url , pb_dir , file_name )
29
- r = requests .get (url )
63
+ url = posixpath .join (DB_INDEX_URL , pb_dir , file_name )
64
+ response = requests .get (url )
30
65
31
66
# Raise HTTPError if invalid url
32
- r .raise_for_status ()
67
+ response .raise_for_status ()
33
68
34
69
# Get each line as a string
35
- filelines = r .content .decode ('iso-8859-1' ).splitlines ()
70
+ filelines = response .content .decode ('iso-8859-1' ).splitlines ()
36
71
37
72
# Separate content into header and comment lines
38
73
header_lines = []
@@ -82,7 +117,7 @@ def _stream_dat(file_name, pb_dir, byte_count, start_byte, dtype):
82
117
"""
83
118
84
119
# Full url of dat file
85
- url = posixpath .join (db_index_url , pb_dir , file_name )
120
+ url = posixpath .join (DB_INDEX_URL , pb_dir , file_name )
86
121
87
122
# Specify the byte range
88
123
end_byte = start_byte + byte_count - 1
@@ -114,7 +149,7 @@ def _stream_annotation(file_name, pb_dir):
114
149
115
150
"""
116
151
# Full url of annotation file
117
- url = posixpath .join (db_index_url , pb_dir , file_name )
152
+ url = posixpath .join (DB_INDEX_URL , pb_dir , file_name )
118
153
119
154
# Get the content
120
155
response = requests .get (url )
@@ -136,10 +171,10 @@ def get_dbs():
136
171
>>> dbs = get_dbs()
137
172
138
173
"""
139
- url = posixpath .join (db_index_url , 'DBS' )
140
- r = requests .get (url )
174
+ url = posixpath .join (DB_INDEX_URL , 'DBS' )
175
+ response = requests .get (url )
141
176
142
- dbs = r .content .decode ('ascii' ).splitlines ()
177
+ dbs = response .content .decode ('ascii' ).splitlines ()
143
178
dbs = [re .sub ('\t {2,}' , '\t ' , line ).split ('\t ' ) for line in dbs ]
144
179
145
180
return dbs
@@ -166,7 +201,7 @@ def get_record_list(db_dir, records='all'):
166
201
167
202
"""
168
203
# Full url physiobank database
169
- db_url = posixpath .join (db_index_url , db_dir )
204
+ db_url = posixpath .join (DB_INDEX_URL , db_dir )
170
205
171
206
# Check for a RECORDS file
172
207
if records == 'all' :
@@ -175,18 +210,18 @@ def get_record_list(db_dir, records='all'):
175
210
raise ValueError ('The database %s has no WFDB files to download' % db_url )
176
211
177
212
# Get each line as a string
178
- recordlist = response .content .decode ('ascii' ).splitlines ()
213
+ record_list = response .content .decode ('ascii' ).splitlines ()
179
214
# Otherwise the records are input manually
180
215
else :
181
- recordlist = records
216
+ record_list = records
182
217
183
- return recordlist
218
+ return record_list
184
219
185
220
186
221
def get_annotators (db_dir , annotators ):
187
222
188
223
# Full url physiobank database
189
- db_url = posixpath .join (db_index_url , db_dir )
224
+ db_url = posixpath .join (DB_INDEX_URL , db_dir )
190
225
191
226
if annotators is not None :
192
227
# Check for an ANNOTATORS file
@@ -197,61 +232,61 @@ def get_annotators(db_dir, annotators):
197
232
else :
198
233
raise ValueError ('The database %s has no annotation files to download' % db_url )
199
234
# Make sure the input annotators are present in the database
200
- annlist = r .content .decode ('ascii' ).splitlines ()
201
- annlist = [a .split ('\t ' )[0 ] for a in annlist ]
235
+ ann_list = r .content .decode ('ascii' ).splitlines ()
236
+ ann_list = [a .split ('\t ' )[0 ] for a in ann_list ]
202
237
203
238
# Get the annotation file types required
204
239
if annotators == 'all' :
205
240
# all possible ones
206
- annotators = annlist
241
+ annotators = ann_list
207
242
else :
208
243
# In case they didn't input a list
209
244
if type (annotators ) == str :
210
245
annotators = [annotators ]
211
246
# user input ones. Check validity.
212
247
for a in annotators :
213
- if a not in annlist :
248
+ if a not in ann_list :
214
249
raise ValueError ('The database contains no annotators with extension: %s' % a )
215
250
216
251
return annotators
217
252
218
253
219
- def make_local_dirs (dl_dir , dlinputs , keep_subdirs ):
254
+ def make_local_dirs (dl_dir , dl_inputs , keep_subdirs ):
220
255
"""
221
256
Make any required local directories to prepare for downloading
222
257
"""
223
258
224
259
# Make the local download dir if it doesn't exist
225
260
if not os .path .isdir (dl_dir ):
226
261
os .makedirs (dl_dir )
227
- print (" Created local base download directory: " , dl_dir )
262
+ print (' Created local base download directory: %s' % dl_dir )
228
263
# Create all required local subdirectories
229
264
# This must be out of dl_pb_file to
230
265
# avoid clash in multiprocessing
231
266
if keep_subdirs :
232
- dldirs = set ([os .path .join (dl_dir , d [1 ]) for d in dlinputs ])
233
- for d in dldirs :
267
+ dl_dirs = set ([os .path .join (dl_dir , d [1 ]) for d in dl_inputs ])
268
+ for d in dl_dirs :
234
269
if not os .path .isdir (d ):
235
270
os .makedirs (d )
236
271
return
237
272
238
273
239
274
def dl_pb_file (inputs ):
240
- # Download a file from physiobank
241
- # The input args are to be unpacked for the use of multiprocessing
275
+ """
276
+ Download a file from physiobank.
277
+
278
+ The input args are to be unpacked for the use of multiprocessing
279
+ map, because python2 doesn't have starmap...
280
+
281
+ """
242
282
243
283
basefile , subdir , db , dl_dir , keep_subdirs , overwrite = inputs
244
284
245
285
# Full url of file
246
- url = posixpath .join (db_index_url , db , subdir , basefile )
247
-
248
- # Send a head request
249
- response = requests .head (url , headers = {'Accept-Encoding' : 'identity' })
250
- # Raise HTTPError if invalid url
251
- response .raise_for_status ()
286
+ url = posixpath .join (DB_INDEX_URL , db , subdir , basefile )
252
287
253
288
# Supposed size of the file
254
- remote_file_size = int ( response . headers [ 'content-length' ] )
289
+ remote_file_size = _remote_file_size ( url )
255
290
256
291
# Figure out where the file should be locally
257
292
if keep_subdirs :
@@ -276,7 +311,7 @@ def dl_pb_file(inputs):
276
311
r = requests .get (url , headers = headers , stream = True )
277
312
print ('headers: ' , headers )
278
313
print ('r content length: ' , len (r .content ))
279
- with open (local_file , "ba" ) as writefile :
314
+ with open (local_file , 'ba' ) as writefile :
280
315
writefile .write (r .content )
281
316
print ('Done appending.' )
282
317
# Local file is larger than it should be. Redownload.
@@ -304,7 +339,7 @@ def dl_full_file(url, save_file_name):
304
339
305
340
"""
306
341
response = requests .get (url )
307
- with open (save_file_name , "wb" ) as writefile :
342
+ with open (save_file_name , 'wb' ) as writefile :
308
343
writefile .write (response .content )
309
344
310
345
return
@@ -346,22 +381,22 @@ def dl_files(db, dl_dir, files, keep_subdirs=True, overwrite=False):
346
381
"""
347
382
348
383
# Full url physiobank database
349
- db_url = posixpath .join (db_index_url , db )
384
+ db_url = posixpath .join (DB_INDEX_URL , db )
350
385
# Check if the database is valid
351
- r = requests .get (db_url )
352
- r .raise_for_status ()
386
+ response = requests .get (db_url )
387
+ response .raise_for_status ()
353
388
354
389
# Construct the urls to download
355
- dlinputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], db , dl_dir , keep_subdirs , overwrite ) for file in files ]
390
+ dl_inputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], db , dl_dir , keep_subdirs , overwrite ) for file in files ]
356
391
357
392
# Make any required local directories
358
- make_local_dirs (dl_dir , dlinputs , keep_subdirs )
393
+ make_local_dirs (dl_dir , dl_inputs , keep_subdirs )
359
394
360
395
print ('Downloading files...' )
361
396
# Create multiple processes to download files.
362
397
# Limit to 2 connections to avoid overloading the server
363
398
pool = multiprocessing .Pool (processes = 2 )
364
- pool .map (dl_pb_file , dlinputs )
399
+ pool .map (dl_pb_file , dl_inputs )
365
400
print ('Finished downloading files' )
366
401
367
402
return
0 commit comments