14
14
import sys
15
15
import zipfile
16
16
import tempfile
17
+ import pkg_resources
17
18
from distutils .version import LooseVersion
19
+ import pooch
18
20
19
21
import numpy as np
20
22
@@ -222,87 +224,119 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
222
224
name = None , check_version = False , return_version = False ,
223
225
archive_name = None , accept = False ):
224
226
"""Aux function."""
225
- key = {
226
- 'fake' : 'MNE_DATASETS_FAKE_PATH' ,
227
- 'misc' : 'MNE_DATASETS_MISC_PATH' ,
228
- 'sample' : 'MNE_DATASETS_SAMPLE_PATH' ,
229
- 'spm' : 'MNE_DATASETS_SPM_FACE_PATH' ,
230
- 'somato' : 'MNE_DATASETS_SOMATO_PATH' ,
231
- 'brainstorm' : 'MNE_DATASETS_BRAINSTORM_PATH' ,
232
- 'testing' : 'MNE_DATASETS_TESTING_PATH' ,
233
- 'multimodal' : 'MNE_DATASETS_MULTIMODAL_PATH' ,
234
- 'fnirs_motor' : 'MNE_DATASETS_FNIRS_MOTOR_PATH' ,
235
- 'opm' : 'MNE_DATASETS_OPM_PATH' ,
236
- 'visual_92_categories' : 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH' ,
237
- 'kiloword' : 'MNE_DATASETS_KILOWORD_PATH' ,
238
- 'mtrf' : 'MNE_DATASETS_MTRF_PATH' ,
239
- 'fieldtrip_cmc' : 'MNE_DATASETS_FIELDTRIP_CMC_PATH' ,
240
- 'phantom_4dbti' : 'MNE_DATASETS_PHANTOM_4DBTI_PATH' ,
241
- 'limo' : 'MNE_DATASETS_LIMO_PATH' ,
242
- 'refmeg_noise' : 'MNE_DATASETS_REFMEG_NOISE_PATH' ,
243
- }[name ]
244
-
245
- path = _get_path (path , key , name )
246
- # To update the testing or misc dataset, push commits, then make a new
247
- # release on GitHub. Then update the "releases" variable:
227
+ # To update the testing or misc datasets, push or merge commits to their
228
+ # respective repos, and make a new release of the dataset on GitHub.
229
+ # Then update the checksum in `mne/data/dataset_checksums.txt`,
230
+ # and change this "releases" variable:
248
231
releases = dict (testing = '0.112' , misc = '0.7' )
249
- # And also update the "md5_hashes['testing']" variable below.
250
- # To update any other dataset, update the data archive itself (upload
251
- # an updated version) and update the md5 hash.
252
-
253
- # try to match url->archive_name->folder_name
254
- urls = dict ( # the URLs to use
255
- brainstorm = dict (
256
- bst_auditory = 'https://osf.io/5t9n8/download?version=1' ,
257
- bst_phantom_ctf = 'https://osf.io/sxr8y/download?version=1' ,
258
- bst_phantom_elekta = 'https://osf.io/dpcku/download?version=1' ,
259
- bst_raw = 'https://osf.io/9675n/download?version=2' ,
260
- bst_resting = 'https://osf.io/m7bd3/download?version=3' ),
261
- fake = 'https://github.com/mne-tools/mne-testing-data/raw/master/'
262
- 'datasets/foo.tgz' ,
263
- misc = 'https://codeload.github.com/mne-tools/mne-misc-data/'
264
- 'tar.gz/%s' % releases ['misc' ],
232
+ # To update any other dataset besides `testing` or `misc`, upload the new
233
+ # version of the data archive itself (e.g., to osf.io) and then update the
234
+ # corresponding checksum in `mne/data/dataset_checksums.txt`.
235
+ testing_data_name = f'mne-testing-data-{ releases ["testing" ]} '
236
+ misc_data_name = f'mne-misc-data-{ releases ["misc" ]} '
237
+
238
+ config_keys = dict (
239
+ fake = 'MNE_DATASETS_FAKE_PATH' ,
240
+ misc = 'MNE_DATASETS_MISC_PATH' ,
241
+ sample = 'MNE_DATASETS_SAMPLE_PATH' ,
242
+ spm = 'MNE_DATASETS_SPM_FACE_PATH' ,
243
+ somato = 'MNE_DATASETS_SOMATO_PATH' ,
244
+ brainstorm = 'MNE_DATASETS_BRAINSTORM_PATH' ,
245
+ testing = 'MNE_DATASETS_TESTING_PATH' ,
246
+ multimodal = 'MNE_DATASETS_MULTIMODAL_PATH' ,
247
+ fnirs_motor = 'MNE_DATASETS_FNIRS_MOTOR_PATH' ,
248
+ opm = 'MNE_DATASETS_OPM_PATH' ,
249
+ visual_92_categories = 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH' ,
250
+ kiloword = 'MNE_DATASETS_KILOWORD_PATH' ,
251
+ mtrf = 'MNE_DATASETS_MTRF_PATH' ,
252
+ fieldtrip_cmc = 'MNE_DATASETS_FIELDTRIP_CMC_PATH' ,
253
+ phantom_4dbti = 'MNE_DATASETS_PHANTOM_4DBTI_PATH' ,
254
+ limo = 'MNE_DATASETS_LIMO_PATH' ,
255
+ refmeg_noise = 'MNE_DATASETS_REFMEG_NOISE_PATH' ,
256
+ )
257
+ path = _get_path (path , config_keys [name ], name )
258
+
259
+ # the download URLs
260
+ urls = dict (
261
+ bst_auditory = 'https://osf.io/5t9n8/download?version=1' ,
262
+ bst_phantom_ctf = 'https://osf.io/sxr8y/download?version=1' ,
263
+ bst_phantom_elekta = 'https://osf.io/dpcku/download?version=1' ,
264
+ bst_raw = 'https://osf.io/9675n/download?version=2' ,
265
+ bst_resting = 'https://osf.io/m7bd3/download?version=3' ,
266
+ fnirs_motor = 'https://osf.io/dj3eh/download?version=1' ,
267
+ kiloword = 'https://osf.io/qkvf9/download?version=1' ,
268
+ multimodal = 'https://ndownloader.figshare.com/files/5999598' ,
269
+ opm = 'https://osf.io/p6ae7/download?version=2' ,
270
+ phantom_4dbti = 'https://osf.io/v2brw/download?version=2' ,
265
271
sample = 'https://osf.io/86qa2/download?version=5' ,
266
272
somato = 'https://osf.io/tp4sg/download?version=7' ,
267
273
spm = 'https://osf.io/je4s8/download?version=2' ,
268
- testing = 'https://codeload.github.com/mne-tools/mne-testing-data/'
269
- 'tar.gz/%s' % releases ['testing' ],
270
- multimodal = 'https://ndownloader.figshare.com/files/5999598' ,
271
- fnirs_motor = 'https://osf.io/dj3eh/download?version=1' ,
272
- opm = 'https://osf.io/p6ae7/download?version=2' ,
273
- visual_92_categories = [
274
- 'https://osf.io/8ejrs/download?version=1' ,
275
- 'https://osf.io/t4yjp/download?version=1' ],
274
+ visual_92_categories_1 = 'https://osf.io/8ejrs/download?version=1' ,
275
+ visual_92_categories_2 = 'https://osf.io/t4yjp/download?version=1' ,
276
276
mtrf = 'https://osf.io/h85s2/download?version=1' ,
277
- kiloword = 'https://osf.io/qkvf9/download?version=1' ,
278
- fieldtrip_cmc = 'https://osf.io/j9b6s/download?version=1' ,
279
- phantom_4dbti = 'https://osf.io/v2brw/download?version=2' ,
280
277
refmeg_noise = 'https://osf.io/drt6v/download?version=1' ,
278
+ fieldtrip_cmc = 'https://osf.io/j9b6s/download?version=1' ,
279
+ fake = ('https://github.com/mne-tools/mne-testing-data/raw/master/'
280
+ 'datasets/foo.tgz' ),
281
+ misc = ('https://codeload.github.com/mne-tools/mne-misc-data/tar.gz/'
282
+ f'{ releases ["misc" ]} ' ),
283
+ testing = ('https://codeload.github.com/mne-tools/mne-testing-data/'
284
+ f'tar.gz/{ releases ["testing" ]} ' ),
281
285
)
282
- # filename of the resulting downloaded archive (only needed if the URL
283
- # name does not match resulting filename)
286
+ # filename of the resulting downloaded archive
284
287
archive_names = dict (
285
- fieldtrip_cmc = 'SubjectCMC.zip' ,
288
+ bst_auditory = 'bst_auditory.tar.gz' ,
289
+ bst_phantom_ctf = 'bst_phantom_ctf.tar.gz' ,
290
+ bst_phantom_elekta = 'bst_phantom_elekta.tar.gz' ,
291
+ bst_raw = 'bst_raw.tar.gz' ,
292
+ bst_resting = 'bst_resting.tar.gz' ,
293
+ fnirs_motor = 'MNE-fNIRS-motor-data.tgz' ,
286
294
kiloword = 'MNE-kiloword-data.tar.gz' ,
287
- misc = 'mne-misc-data-%s.tar.gz' % releases ['misc' ],
288
- mtrf = 'mTRF_1.5.zip' ,
289
295
multimodal = 'MNE-multimodal-data.tar.gz' ,
290
- fnirs_motor = 'MNE-fNIRS-motor-data.tgz' ,
291
296
opm = 'MNE-OPM-data.tar.gz' ,
297
+ phantom_4dbti = 'MNE-phantom-4DBTi.zip' ,
292
298
sample = 'MNE-sample-data-processed.tar.gz' ,
293
299
somato = 'MNE-somato-data.tar.gz' ,
294
300
spm = 'MNE-spm-face.tar.gz' ,
295
- testing = 'mne-testing-data-%s.tar.gz' % releases ['testing' ],
296
- visual_92_categories = ['MNE-visual_92_categories-data-part1.tar.gz' ,
297
- 'MNE-visual_92_categories-data-part2.tar.gz' ],
298
- phantom_4dbti = 'MNE-phantom-4DBTi.zip' ,
299
- refmeg_noise = 'sample_reference_MEG_noise-raw.zip'
301
+ visual_92_categories_1 = 'MNE-visual_92_categories-data-part1.tar.gz' ,
302
+ visual_92_categories_2 = 'MNE-visual_92_categories-data-part2.tar.gz' ,
303
+ mtrf = 'mTRF_1.5.zip' ,
304
+ refmeg_noise = 'sample_reference_MEG_noise-raw.zip' ,
305
+ fieldtrip_cmc = 'SubjectCMC.zip' ,
306
+ fake = 'foo.tgz' ,
307
+ misc = f'{ misc_data_name } .tar.gz' ,
308
+ testing = f'{ testing_data_name } .tar.gz' ,
309
+ )
310
+ assert set (archive_names .keys ()) == set (urls .keys ())
311
+ # construct the mapping needed by pooch
312
+ pooch_urls = {archive_names [key ]: urls [key ] for key in urls }
313
+ # create the download manager
314
+ fetcher = pooch .create (
315
+ path = path ,
316
+ base_url = '' , # all URLs are given in the `urls` dict
317
+ version = None , # because our data and code are in separate repos
318
+ registry = None , # will load from file later
319
+ urls = pooch_urls
300
320
)
321
+ # load the checksum registry
322
+ registry = pkg_resources .resource_stream (
323
+ 'mne' , op .join ('data' , 'dataset_checksums.txt' ))
324
+ fetcher .load_registry (registry )
325
+ # update the keys that are versioned
326
+ versioned_keys = {
327
+ f'{ testing_data_name } .tar.gz' : fetcher .registry ['mne-testing-data' ],
328
+ f'{ misc_data_name } .tar.gz' : fetcher .registry ['mne-misc-data' ]}
329
+ fetcher .registry .update (versioned_keys )
330
+ for key in ('testing' , 'misc' ):
331
+ del fetcher .registry [f'mne-{ key } -data' ]
332
+
333
+ # TODO resume here
334
+
301
335
# original folder names that get extracted (only needed if the
302
336
# archive does not extract the right folder name; e.g., usually GitHub)
303
337
folder_origs = dict ( # not listed means None (no need to move)
304
- misc = 'mne-misc-data-%s' % releases [ 'misc' ] ,
305
- testing = 'mne-testing-data-%s' % releases [ 'testing' ] ,
338
+ misc = misc_data_name ,
339
+ testing = testing_data_name ,
306
340
)
307
341
# finally, where we want them to extract to (only needed if the folder name
308
342
# is not the same as the last bit of the archive name without the file
@@ -319,31 +353,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
319
353
phantom_4dbti = 'MNE-phantom-4DBTi' ,
320
354
refmeg_noise = 'MNE-refmeg-noise-data'
321
355
)
322
- md5_hashes = dict (
323
- brainstorm = dict (
324
- bst_auditory = 'fa371a889a5688258896bfa29dd1700b' ,
325
- bst_phantom_ctf = '80819cb7f5b92d1a5289db3fb6acb33c' ,
326
- bst_phantom_elekta = '1badccbe17998d18cc373526e86a7aaf' ,
327
- bst_raw = 'fa2efaaec3f3d462b319bc24898f440c' ,
328
- bst_resting = '70fc7bf9c3b97c4f2eab6260ee4a0430' ),
329
- fake = '3194e9f7b46039bb050a74f3e1ae9908' ,
330
- misc = '2b2f2fec9d1197ed459117db1c6341ee' ,
331
- sample = '12b75d1cb7df9dfb4ad73ed82f61094f' ,
332
- somato = '32fd2f6c8c7eb0784a1de6435273c48b' ,
333
- spm = '9f43f67150e3b694b523a21eb929ea75' ,
334
- testing = '8eabd73532dd7df7c155983962c5b1fd' ,
335
- multimodal = '26ec847ae9ab80f58f204d09e2c08367' ,
336
- fnirs_motor = 'c4935d19ddab35422a69f3326a01fef8' ,
337
- opm = '370ad1dcfd5c47e029e692c85358a374' ,
338
- visual_92_categories = ['74f50bbeb65740903eadc229c9fa759f' ,
339
- '203410a98afc9df9ae8ba9f933370e20' ],
340
- kiloword = '3a124170795abbd2e48aae8727e719a8' ,
341
- mtrf = '273a390ebbc48da2c3184b01a82e4636' ,
342
- fieldtrip_cmc = '6f9fd6520f9a66e20994423808d2528c' ,
343
- phantom_4dbti = '938a601440f3ffa780d20a17bae039ff' ,
344
- refmeg_noise = '779fecd890d98b73a4832e717d7c7c45'
345
- )
346
- assert set (md5_hashes .keys ()) == set (urls .keys ())
356
+
347
357
url = urls [name ]
348
358
hash_ = md5_hashes [name ]
349
359
folder_orig = folder_origs .get (name , None )
0 commit comments