Skip to content

Commit b6dfaa1

Browse files
committed
WIP: use pooch [ci skip]
1 parent b44917e commit b6dfaa1

File tree

1 file changed

+98
-88
lines changed

1 file changed

+98
-88
lines changed

mne/datasets/utils.py

Lines changed: 98 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
import sys
1515
import zipfile
1616
import tempfile
17+
import pkg_resources
1718
from distutils.version import LooseVersion
19+
import pooch
1820

1921
import numpy as np
2022

@@ -222,87 +224,119 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
222224
name=None, check_version=False, return_version=False,
223225
archive_name=None, accept=False):
224226
"""Aux function."""
225-
key = {
226-
'fake': 'MNE_DATASETS_FAKE_PATH',
227-
'misc': 'MNE_DATASETS_MISC_PATH',
228-
'sample': 'MNE_DATASETS_SAMPLE_PATH',
229-
'spm': 'MNE_DATASETS_SPM_FACE_PATH',
230-
'somato': 'MNE_DATASETS_SOMATO_PATH',
231-
'brainstorm': 'MNE_DATASETS_BRAINSTORM_PATH',
232-
'testing': 'MNE_DATASETS_TESTING_PATH',
233-
'multimodal': 'MNE_DATASETS_MULTIMODAL_PATH',
234-
'fnirs_motor': 'MNE_DATASETS_FNIRS_MOTOR_PATH',
235-
'opm': 'MNE_DATASETS_OPM_PATH',
236-
'visual_92_categories': 'MNE_DATASETS_VISUAL_92_CATEGORIES_PATH',
237-
'kiloword': 'MNE_DATASETS_KILOWORD_PATH',
238-
'mtrf': 'MNE_DATASETS_MTRF_PATH',
239-
'fieldtrip_cmc': 'MNE_DATASETS_FIELDTRIP_CMC_PATH',
240-
'phantom_4dbti': 'MNE_DATASETS_PHANTOM_4DBTI_PATH',
241-
'limo': 'MNE_DATASETS_LIMO_PATH',
242-
'refmeg_noise': 'MNE_DATASETS_REFMEG_NOISE_PATH',
243-
}[name]
244-
245-
path = _get_path(path, key, name)
246-
# To update the testing or misc dataset, push commits, then make a new
247-
# release on GitHub. Then update the "releases" variable:
227+
# To update the testing or misc datasets, push or merge commits to their
228+
# respective repos, and make a new release of the dataset on GitHub.
229+
# Then update the checksum in `mne/data/dataset_checksums.txt`,
230+
# and change this "releases" variable:
248231
releases = dict(testing='0.112', misc='0.7')
249-
# And also update the "md5_hashes['testing']" variable below.
250-
# To update any other dataset, update the data archive itself (upload
251-
# an updated version) and update the md5 hash.
252-
253-
# try to match url->archive_name->folder_name
254-
urls = dict( # the URLs to use
255-
brainstorm=dict(
256-
bst_auditory='https://osf.io/5t9n8/download?version=1',
257-
bst_phantom_ctf='https://osf.io/sxr8y/download?version=1',
258-
bst_phantom_elekta='https://osf.io/dpcku/download?version=1',
259-
bst_raw='https://osf.io/9675n/download?version=2',
260-
bst_resting='https://osf.io/m7bd3/download?version=3'),
261-
fake='https://github.com/mne-tools/mne-testing-data/raw/master/'
262-
'datasets/foo.tgz',
263-
misc='https://codeload.github.com/mne-tools/mne-misc-data/'
264-
'tar.gz/%s' % releases['misc'],
232+
# To update any other dataset besides `testing` or `misc`, upload the new
233+
# version of the data archive itself (e.g., to osf.io) and then update the
234+
# corresponding checksum in `mne/data/dataset_checksums.txt`.
235+
testing_data_name = f'mne-testing-data-{releases["testing"]}'
236+
misc_data_name = f'mne-misc-data-{releases["misc"]}'
237+
238+
config_keys = dict(
239+
fake='MNE_DATASETS_FAKE_PATH',
240+
misc='MNE_DATASETS_MISC_PATH',
241+
sample='MNE_DATASETS_SAMPLE_PATH',
242+
spm='MNE_DATASETS_SPM_FACE_PATH',
243+
somato='MNE_DATASETS_SOMATO_PATH',
244+
brainstorm='MNE_DATASETS_BRAINSTORM_PATH',
245+
testing='MNE_DATASETS_TESTING_PATH',
246+
multimodal='MNE_DATASETS_MULTIMODAL_PATH',
247+
fnirs_motor='MNE_DATASETS_FNIRS_MOTOR_PATH',
248+
opm='MNE_DATASETS_OPM_PATH',
249+
visual_92_categories='MNE_DATASETS_VISUAL_92_CATEGORIES_PATH',
250+
kiloword='MNE_DATASETS_KILOWORD_PATH',
251+
mtrf='MNE_DATASETS_MTRF_PATH',
252+
fieldtrip_cmc='MNE_DATASETS_FIELDTRIP_CMC_PATH',
253+
phantom_4dbti='MNE_DATASETS_PHANTOM_4DBTI_PATH',
254+
limo='MNE_DATASETS_LIMO_PATH',
255+
refmeg_noise='MNE_DATASETS_REFMEG_NOISE_PATH',
256+
)
257+
path = _get_path(path, config_keys[name], name)
258+
259+
# the download URLs
260+
urls = dict(
261+
bst_auditory='https://osf.io/5t9n8/download?version=1',
262+
bst_phantom_ctf='https://osf.io/sxr8y/download?version=1',
263+
bst_phantom_elekta='https://osf.io/dpcku/download?version=1',
264+
bst_raw='https://osf.io/9675n/download?version=2',
265+
bst_resting='https://osf.io/m7bd3/download?version=3',
266+
fnirs_motor='https://osf.io/dj3eh/download?version=1',
267+
kiloword='https://osf.io/qkvf9/download?version=1',
268+
multimodal='https://ndownloader.figshare.com/files/5999598',
269+
opm='https://osf.io/p6ae7/download?version=2',
270+
phantom_4dbti='https://osf.io/v2brw/download?version=2',
265271
sample='https://osf.io/86qa2/download?version=5',
266272
somato='https://osf.io/tp4sg/download?version=7',
267273
spm='https://osf.io/je4s8/download?version=2',
268-
testing='https://codeload.github.com/mne-tools/mne-testing-data/'
269-
'tar.gz/%s' % releases['testing'],
270-
multimodal='https://ndownloader.figshare.com/files/5999598',
271-
fnirs_motor='https://osf.io/dj3eh/download?version=1',
272-
opm='https://osf.io/p6ae7/download?version=2',
273-
visual_92_categories=[
274-
'https://osf.io/8ejrs/download?version=1',
275-
'https://osf.io/t4yjp/download?version=1'],
274+
visual_92_categories_1='https://osf.io/8ejrs/download?version=1',
275+
visual_92_categories_2='https://osf.io/t4yjp/download?version=1',
276276
mtrf='https://osf.io/h85s2/download?version=1',
277-
kiloword='https://osf.io/qkvf9/download?version=1',
278-
fieldtrip_cmc='https://osf.io/j9b6s/download?version=1',
279-
phantom_4dbti='https://osf.io/v2brw/download?version=2',
280277
refmeg_noise='https://osf.io/drt6v/download?version=1',
278+
fieldtrip_cmc='https://osf.io/j9b6s/download?version=1',
279+
fake=('https://github.com/mne-tools/mne-testing-data/raw/master/'
280+
'datasets/foo.tgz'),
281+
misc=('https://codeload.github.com/mne-tools/mne-misc-data/tar.gz/'
282+
f'{releases["misc"]}'),
283+
testing=('https://codeload.github.com/mne-tools/mne-testing-data/'
284+
f'tar.gz/{releases["testing"]}'),
281285
)
282-
# filename of the resulting downloaded archive (only needed if the URL
283-
# name does not match resulting filename)
286+
# filename of the resulting downloaded archive
284287
archive_names = dict(
285-
fieldtrip_cmc='SubjectCMC.zip',
288+
bst_auditory='bst_auditory.tar.gz',
289+
bst_phantom_ctf='bst_phantom_ctf.tar.gz',
290+
bst_phantom_elekta='bst_phantom_elekta.tar.gz',
291+
bst_raw='bst_raw.tar.gz',
292+
bst_resting='bst_resting.tar.gz',
293+
fnirs_motor='MNE-fNIRS-motor-data.tgz',
286294
kiloword='MNE-kiloword-data.tar.gz',
287-
misc='mne-misc-data-%s.tar.gz' % releases['misc'],
288-
mtrf='mTRF_1.5.zip',
289295
multimodal='MNE-multimodal-data.tar.gz',
290-
fnirs_motor='MNE-fNIRS-motor-data.tgz',
291296
opm='MNE-OPM-data.tar.gz',
297+
phantom_4dbti='MNE-phantom-4DBTi.zip',
292298
sample='MNE-sample-data-processed.tar.gz',
293299
somato='MNE-somato-data.tar.gz',
294300
spm='MNE-spm-face.tar.gz',
295-
testing='mne-testing-data-%s.tar.gz' % releases['testing'],
296-
visual_92_categories=['MNE-visual_92_categories-data-part1.tar.gz',
297-
'MNE-visual_92_categories-data-part2.tar.gz'],
298-
phantom_4dbti='MNE-phantom-4DBTi.zip',
299-
refmeg_noise='sample_reference_MEG_noise-raw.zip'
301+
visual_92_categories_1='MNE-visual_92_categories-data-part1.tar.gz',
302+
visual_92_categories_2='MNE-visual_92_categories-data-part2.tar.gz',
303+
mtrf='mTRF_1.5.zip',
304+
refmeg_noise='sample_reference_MEG_noise-raw.zip',
305+
fieldtrip_cmc='SubjectCMC.zip',
306+
fake='foo.tgz',
307+
misc=f'{misc_data_name}.tar.gz',
308+
testing=f'{testing_data_name}.tar.gz',
309+
)
310+
assert set(archive_names.keys()) == set(urls.keys())
311+
# construct the mapping needed by pooch
312+
pooch_urls = {archive_names[key]: urls[key] for key in urls}
313+
# create the download manager
314+
fetcher = pooch.create(
315+
path=path,
316+
base_url='', # all URLs are given in the `urls` dict
317+
version=None, # because our data and code are in separate repos
318+
registry=None, # will load from file later
319+
urls=pooch_urls
300320
)
321+
# load the checksum registry
322+
registry = pkg_resources.resource_stream(
323+
'mne', op.join('data', 'dataset_checksums.txt'))
324+
fetcher.load_registry(registry)
325+
# update the keys that are versioned
326+
versioned_keys = {
327+
f'{testing_data_name}.tar.gz': fetcher.registry['mne-testing-data'],
328+
f'{misc_data_name}.tar.gz': fetcher.registry['mne-misc-data']}
329+
fetcher.registry.update(versioned_keys)
330+
for key in ('testing', 'misc'):
331+
del fetcher.registry[f'mne-{key}-data']
332+
333+
# TODO resume here
334+
301335
# original folder names that get extracted (only needed if the
302336
# archive does not extract the right folder name; e.g., usually GitHub)
303337
folder_origs = dict( # not listed means None (no need to move)
304-
misc='mne-misc-data-%s' % releases['misc'],
305-
testing='mne-testing-data-%s' % releases['testing'],
338+
misc=misc_data_name,
339+
testing=testing_data_name,
306340
)
307341
# finally, where we want them to extract to (only needed if the folder name
308342
# is not the same as the last bit of the archive name without the file
@@ -319,31 +353,7 @@ def _data_path(path=None, force_update=False, update_path=True, download=True,
319353
phantom_4dbti='MNE-phantom-4DBTi',
320354
refmeg_noise='MNE-refmeg-noise-data'
321355
)
322-
md5_hashes = dict(
323-
brainstorm=dict(
324-
bst_auditory='fa371a889a5688258896bfa29dd1700b',
325-
bst_phantom_ctf='80819cb7f5b92d1a5289db3fb6acb33c',
326-
bst_phantom_elekta='1badccbe17998d18cc373526e86a7aaf',
327-
bst_raw='fa2efaaec3f3d462b319bc24898f440c',
328-
bst_resting='70fc7bf9c3b97c4f2eab6260ee4a0430'),
329-
fake='3194e9f7b46039bb050a74f3e1ae9908',
330-
misc='2b2f2fec9d1197ed459117db1c6341ee',
331-
sample='12b75d1cb7df9dfb4ad73ed82f61094f',
332-
somato='32fd2f6c8c7eb0784a1de6435273c48b',
333-
spm='9f43f67150e3b694b523a21eb929ea75',
334-
testing='8eabd73532dd7df7c155983962c5b1fd',
335-
multimodal='26ec847ae9ab80f58f204d09e2c08367',
336-
fnirs_motor='c4935d19ddab35422a69f3326a01fef8',
337-
opm='370ad1dcfd5c47e029e692c85358a374',
338-
visual_92_categories=['74f50bbeb65740903eadc229c9fa759f',
339-
'203410a98afc9df9ae8ba9f933370e20'],
340-
kiloword='3a124170795abbd2e48aae8727e719a8',
341-
mtrf='273a390ebbc48da2c3184b01a82e4636',
342-
fieldtrip_cmc='6f9fd6520f9a66e20994423808d2528c',
343-
phantom_4dbti='938a601440f3ffa780d20a17bae039ff',
344-
refmeg_noise='779fecd890d98b73a4832e717d7c7c45'
345-
)
346-
assert set(md5_hashes.keys()) == set(urls.keys())
356+
347357
url = urls[name]
348358
hash_ = md5_hashes[name]
349359
folder_orig = folder_origs.get(name, None)

0 commit comments

Comments
 (0)