1414import sys
1515import zipfile
1616import tempfile
17+ import pkg_resources
1718from distutils .version import LooseVersion
19+ import pooch
1820
1921import numpy as np
2022
@@ -222,87 +224,119 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
222224 name = None , check_version = False , return_version = False ,
223225 archive_name = None , accept = False ):
224226 """Aux function."""
225- key = {
226- 'fake' : 'MNE_DATASETS_FAKE_PATH' ,
227- 'misc' : 'MNE_DATASETS_MISC_PATH' ,
228- 'sample' : 'MNE_DATASETS_SAMPLE_PATH' ,
229- 'spm' : 'MNE_DATASETS_SPM_FACE_PATH' ,
230- 'somato' : 'MNE_DATASETS_SOMATO_PATH' ,
231- 'brainstorm' : 'MNE_DATASETS_BRAINSTORM_PATH' ,
232- 'testing' : 'MNE_DATASETS_TESTING_PATH' ,
233- 'multimodal' : 'MNE_DATASETS_MULTIMODAL_PATH' ,
234- 'fnirs_motor' : 'MNE_DATASETS_FNIRS_MOTOR_PATH' ,
235- 'opm' : 'MNE_DATASETS_OPM_PATH' ,
236- 'visual_92_categories' : 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH' ,
237- 'kiloword' : 'MNE_DATASETS_KILOWORD_PATH' ,
238- 'mtrf' : 'MNE_DATASETS_MTRF_PATH' ,
239- 'fieldtrip_cmc' : 'MNE_DATASETS_FIELDTRIP_CMC_PATH' ,
240- 'phantom_4dbti' : 'MNE_DATASETS_PHANTOM_4DBTI_PATH' ,
241- 'limo' : 'MNE_DATASETS_LIMO_PATH' ,
242- 'refmeg_noise' : 'MNE_DATASETS_REFMEG_NOISE_PATH' ,
243- }[name ]
244-
245- path = _get_path (path , key , name )
246- # To update the testing or misc dataset, push commits, then make a new
247- # release on GitHub. Then update the "releases" variable:
227+ # To update the testing or misc datasets, push or merge commits to their
228+ # respective repos, and make a new release of the dataset on GitHub.
229+ # Then update the checksum in `mne/data/dataset_checksums.txt`,
230+ # and change this "releases" variable:
248231 releases = dict (testing = '0.112' , misc = '0.7' )
249- # And also update the "md5_hashes['testing']" variable below.
250- # To update any other dataset, update the data archive itself (upload
251- # an updated version) and update the md5 hash.
252-
253- # try to match url->archive_name->folder_name
254- urls = dict ( # the URLs to use
255- brainstorm = dict (
256- bst_auditory = 'https://osf.io/5t9n8/download?version=1' ,
257- bst_phantom_ctf = 'https://osf.io/sxr8y/download?version=1' ,
258- bst_phantom_elekta = 'https://osf.io/dpcku/download?version=1' ,
259- bst_raw = 'https://osf.io/9675n/download?version=2' ,
260- bst_resting = 'https://osf.io/m7bd3/download?version=3' ),
261- fake = 'https://github.com/mne-tools/mne-testing-data/raw/master/'
262- 'datasets/foo.tgz' ,
263- misc = 'https://codeload.github.com/mne-tools/mne-misc-data/'
264- 'tar.gz/%s' % releases ['misc' ],
232+ # To update any other dataset besides `testing` or `misc`, upload the new
233+ # version of the data archive itself (e.g., to osf.io) and then update the
234+ # corresponding checksum in `mne/data/dataset_checksums.txt`.
235+ testing_data_name = f'mne-testing-data-{ releases ["testing" ]} '
236+ misc_data_name = f'mne-misc-data-{ releases ["misc" ]} '
237+
238+ config_keys = dict (
239+ fake = 'MNE_DATASETS_FAKE_PATH' ,
240+ misc = 'MNE_DATASETS_MISC_PATH' ,
241+ sample = 'MNE_DATASETS_SAMPLE_PATH' ,
242+ spm = 'MNE_DATASETS_SPM_FACE_PATH' ,
243+ somato = 'MNE_DATASETS_SOMATO_PATH' ,
244+ brainstorm = 'MNE_DATASETS_BRAINSTORM_PATH' ,
245+ testing = 'MNE_DATASETS_TESTING_PATH' ,
246+ multimodal = 'MNE_DATASETS_MULTIMODAL_PATH' ,
247+ fnirs_motor = 'MNE_DATASETS_FNIRS_MOTOR_PATH' ,
248+ opm = 'MNE_DATASETS_OPM_PATH' ,
249+ visual_92_categories = 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH' ,
250+ kiloword = 'MNE_DATASETS_KILOWORD_PATH' ,
251+ mtrf = 'MNE_DATASETS_MTRF_PATH' ,
252+ fieldtrip_cmc = 'MNE_DATASETS_FIELDTRIP_CMC_PATH' ,
253+ phantom_4dbti = 'MNE_DATASETS_PHANTOM_4DBTI_PATH' ,
254+ limo = 'MNE_DATASETS_LIMO_PATH' ,
255+ refmeg_noise = 'MNE_DATASETS_REFMEG_NOISE_PATH' ,
256+ )
257+ path = _get_path (path , config_keys [name ], name )
258+
259+ # the download URLs
260+ urls = dict (
261+ bst_auditory = 'https://osf.io/5t9n8/download?version=1' ,
262+ bst_phantom_ctf = 'https://osf.io/sxr8y/download?version=1' ,
263+ bst_phantom_elekta = 'https://osf.io/dpcku/download?version=1' ,
264+ bst_raw = 'https://osf.io/9675n/download?version=2' ,
265+ bst_resting = 'https://osf.io/m7bd3/download?version=3' ,
266+ fnirs_motor = 'https://osf.io/dj3eh/download?version=1' ,
267+ kiloword = 'https://osf.io/qkvf9/download?version=1' ,
268+ multimodal = 'https://ndownloader.figshare.com/files/5999598' ,
269+ opm = 'https://osf.io/p6ae7/download?version=2' ,
270+ phantom_4dbti = 'https://osf.io/v2brw/download?version=2' ,
265271 sample = 'https://osf.io/86qa2/download?version=5' ,
266272 somato = 'https://osf.io/tp4sg/download?version=7' ,
267273 spm = 'https://osf.io/je4s8/download?version=2' ,
268- testing = 'https://codeload.github.com/mne-tools/mne-testing-data/'
269- 'tar.gz/%s' % releases ['testing' ],
270- multimodal = 'https://ndownloader.figshare.com/files/5999598' ,
271- fnirs_motor = 'https://osf.io/dj3eh/download?version=1' ,
272- opm = 'https://osf.io/p6ae7/download?version=2' ,
273- visual_92_categories = [
274- 'https://osf.io/8ejrs/download?version=1' ,
275- 'https://osf.io/t4yjp/download?version=1' ],
274+ visual_92_categories_1 = 'https://osf.io/8ejrs/download?version=1' ,
275+ visual_92_categories_2 = 'https://osf.io/t4yjp/download?version=1' ,
276276 mtrf = 'https://osf.io/h85s2/download?version=1' ,
277- kiloword = 'https://osf.io/qkvf9/download?version=1' ,
278- fieldtrip_cmc = 'https://osf.io/j9b6s/download?version=1' ,
279- phantom_4dbti = 'https://osf.io/v2brw/download?version=2' ,
280277 refmeg_noise = 'https://osf.io/drt6v/download?version=1' ,
278+ fieldtrip_cmc = 'https://osf.io/j9b6s/download?version=1' ,
279+ fake = ('https://github.com/mne-tools/mne-testing-data/raw/master/'
280+ 'datasets/foo.tgz' ),
281+ misc = ('https://codeload.github.com/mne-tools/mne-misc-data/tar.gz/'
282+ f'{ releases ["misc" ]} ' ),
283+ testing = ('https://codeload.github.com/mne-tools/mne-testing-data/'
284+ f'tar.gz/{ releases ["testing" ]} ' ),
281285 )
282- # filename of the resulting downloaded archive (only needed if the URL
283- # name does not match resulting filename)
286+ # filename of the resulting downloaded archive
284287 archive_names = dict (
285- fieldtrip_cmc = 'SubjectCMC.zip' ,
288+ bst_auditory = 'bst_auditory.tar.gz' ,
289+ bst_phantom_ctf = 'bst_phantom_ctf.tar.gz' ,
290+ bst_phantom_elekta = 'bst_phantom_elekta.tar.gz' ,
291+ bst_raw = 'bst_raw.tar.gz' ,
292+ bst_resting = 'bst_resting.tar.gz' ,
293+ fnirs_motor = 'MNE-fNIRS-motor-data.tgz' ,
286294 kiloword = 'MNE-kiloword-data.tar.gz' ,
287- misc = 'mne-misc-data-%s.tar.gz' % releases ['misc' ],
288- mtrf = 'mTRF_1.5.zip' ,
289295 multimodal = 'MNE-multimodal-data.tar.gz' ,
290- fnirs_motor = 'MNE-fNIRS-motor-data.tgz' ,
291296 opm = 'MNE-OPM-data.tar.gz' ,
297+ phantom_4dbti = 'MNE-phantom-4DBTi.zip' ,
292298 sample = 'MNE-sample-data-processed.tar.gz' ,
293299 somato = 'MNE-somato-data.tar.gz' ,
294300 spm = 'MNE-spm-face.tar.gz' ,
295- testing = 'mne-testing-data-%s.tar.gz' % releases ['testing' ],
296- visual_92_categories = ['MNE-visual_92_categories-data-part1.tar.gz' ,
297- 'MNE-visual_92_categories-data-part2.tar.gz' ],
298- phantom_4dbti = 'MNE-phantom-4DBTi.zip' ,
299- refmeg_noise = 'sample_reference_MEG_noise-raw.zip'
301+ visual_92_categories_1 = 'MNE-visual_92_categories-data-part1.tar.gz' ,
302+ visual_92_categories_2 = 'MNE-visual_92_categories-data-part2.tar.gz' ,
303+ mtrf = 'mTRF_1.5.zip' ,
304+ refmeg_noise = 'sample_reference_MEG_noise-raw.zip' ,
305+ fieldtrip_cmc = 'SubjectCMC.zip' ,
306+ fake = 'foo.tgz' ,
307+ misc = f'{ misc_data_name } .tar.gz' ,
308+ testing = f'{ testing_data_name } .tar.gz' ,
309+ )
310+ assert set (archive_names .keys ()) == set (urls .keys ())
311+ # construct the mapping needed by pooch
312+ pooch_urls = {archive_names [key ]: urls [key ] for key in urls }
313+ # create the download manager
314+ fetcher = pooch .create (
315+ path = path ,
316+ base_url = '' , # all URLs are given in the `urls` dict
317+ version = None , # because our data and code are in separate repos
318+ registry = None , # will load from file later
319+ urls = pooch_urls
300320 )
321+ # load the checksum registry
322+ registry = pkg_resources .resource_stream (
323+ 'mne' , op .join ('data' , 'dataset_checksums.txt' ))
324+ fetcher .load_registry (registry )
325+ # update the keys that are versioned
326+ versioned_keys = {
327+ f'{ testing_data_name } .tar.gz' : fetcher .registry ['mne-testing-data' ],
328+ f'{ misc_data_name } .tar.gz' : fetcher .registry ['mne-misc-data' ]}
329+ fetcher .registry .update (versioned_keys )
330+ for key in ('testing' , 'misc' ):
331+ del fetcher .registry [f'mne-{ key } -data' ]
332+
333+ # TODO resume here
334+
301335 # original folder names that get extracted (only needed if the
302336 # archive does not extract the right folder name; e.g., usually GitHub)
303337 folder_origs = dict ( # not listed means None (no need to move)
304- misc = 'mne-misc-data-%s' % releases [ 'misc' ] ,
305- testing = 'mne-testing-data-%s' % releases [ 'testing' ] ,
338+ misc = misc_data_name ,
339+ testing = testing_data_name ,
306340 )
307341 # finally, where we want them to extract to (only needed if the folder name
308342 # is not the same as the last bit of the archive name without the file
@@ -319,31 +353,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
319353 phantom_4dbti = 'MNE-phantom-4DBTi' ,
320354 refmeg_noise = 'MNE-refmeg-noise-data'
321355 )
322- md5_hashes = dict (
323- brainstorm = dict (
324- bst_auditory = 'fa371a889a5688258896bfa29dd1700b' ,
325- bst_phantom_ctf = '80819cb7f5b92d1a5289db3fb6acb33c' ,
326- bst_phantom_elekta = '1badccbe17998d18cc373526e86a7aaf' ,
327- bst_raw = 'fa2efaaec3f3d462b319bc24898f440c' ,
328- bst_resting = '70fc7bf9c3b97c4f2eab6260ee4a0430' ),
329- fake = '3194e9f7b46039bb050a74f3e1ae9908' ,
330- misc = '2b2f2fec9d1197ed459117db1c6341ee' ,
331- sample = '12b75d1cb7df9dfb4ad73ed82f61094f' ,
332- somato = '32fd2f6c8c7eb0784a1de6435273c48b' ,
333- spm = '9f43f67150e3b694b523a21eb929ea75' ,
334- testing = '8eabd73532dd7df7c155983962c5b1fd' ,
335- multimodal = '26ec847ae9ab80f58f204d09e2c08367' ,
336- fnirs_motor = 'c4935d19ddab35422a69f3326a01fef8' ,
337- opm = '370ad1dcfd5c47e029e692c85358a374' ,
338- visual_92_categories = ['74f50bbeb65740903eadc229c9fa759f' ,
339- '203410a98afc9df9ae8ba9f933370e20' ],
340- kiloword = '3a124170795abbd2e48aae8727e719a8' ,
341- mtrf = '273a390ebbc48da2c3184b01a82e4636' ,
342- fieldtrip_cmc = '6f9fd6520f9a66e20994423808d2528c' ,
343- phantom_4dbti = '938a601440f3ffa780d20a17bae039ff' ,
344- refmeg_noise = '779fecd890d98b73a4832e717d7c7c45'
345- )
346- assert set (md5_hashes .keys ()) == set (urls .keys ())
356+
347357 url = urls [name ]
348358 hash_ = md5_hashes [name ]
349359 folder_orig = folder_origs .get (name , None )
0 commit comments