Skip to content
This repository has been archived by the owner on Jul 2, 2021. It is now read-only.

Lock dataset directory before extraction #788

Merged
merged 1 commit into from
Feb 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
filelock
  • Loading branch information
Hakuyume committed Feb 14, 2019
commit 04143b146fe6280266ad72865522dd5396583d73
3 changes: 1 addition & 2 deletions chainercv/datasets/ade20k/ade20k_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

def get_ade20k(root, url):
data_root = download.get_dataset_directory(root)
# To support ChainerMN, target directory should be locked
# before extracting ADE20K.
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
if os.path.exists(os.path.join(data_root, 'ADEChallengeData2016')):
return data_root
Expand Down
5 changes: 2 additions & 3 deletions chainercv/datasets/camvid/camvid_dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import filelock
import glob
import os
import shutil

import filelock
import numpy as np

from chainer.dataset import download
Expand Down Expand Up @@ -50,8 +50,7 @@

def get_camvid():
data_root = download.get_dataset_directory(root)
# To support ChainerMN, target directory should be locked
# before extracting CamVid.
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
download_file_path = utils.cached_download(url)
if len(glob.glob(os.path.join(data_root, '*'))) != 10:
Expand Down
61 changes: 32 additions & 29 deletions chainercv/datasets/coco/coco_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filelock
import os

from chainer.dataset import download
Expand Down Expand Up @@ -42,37 +43,39 @@

def get_coco(split, img_split, year, mode):
data_dir = download.get_dataset_directory(root)
annos_root = os.path.join(data_dir, 'annotations')
img_root = os.path.join(data_dir, 'images')
created_img_root = os.path.join(
img_root, '{}{}'.format(img_split, year))
img_url = img_urls[year][img_split]
if mode == 'instances':
anno_url = instances_anno_urls[year][split]
anno_path = os.path.join(
annos_root, 'instances_{}{}.json'.format(split, year))
elif mode == 'panoptic':
anno_url = panoptic_anno_url
anno_path = os.path.join(
annos_root, 'panoptic_{}{}.json'.format(split, year))
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_dir, 'lock')):
annos_root = os.path.join(data_dir, 'annotations')
img_root = os.path.join(data_dir, 'images')
created_img_root = os.path.join(
img_root, '{}{}'.format(img_split, year))
img_url = img_urls[year][img_split]
if mode == 'instances':
anno_url = instances_anno_urls[year][split]
anno_path = os.path.join(
annos_root, 'instances_{}{}.json'.format(split, year))
elif mode == 'panoptic':
anno_url = panoptic_anno_url
anno_path = os.path.join(
annos_root, 'panoptic_{}{}.json'.format(split, year))

if not os.path.exists(created_img_root):
download_file_path = utils.cached_download(img_url)
ext = os.path.splitext(img_url)[1]
utils.extractall(download_file_path, img_root, ext)
if not os.path.exists(anno_path):
download_file_path = utils.cached_download(anno_url)
ext = os.path.splitext(anno_url)[1]
if split in ['train', 'val']:
utils.extractall(download_file_path, data_dir, ext)
elif split in ['valminusminival', 'minival']:
utils.extractall(download_file_path, annos_root, ext)
if not os.path.exists(created_img_root):
download_file_path = utils.cached_download(img_url)
ext = os.path.splitext(img_url)[1]
utils.extractall(download_file_path, img_root, ext)
if not os.path.exists(anno_path):
download_file_path = utils.cached_download(anno_url)
ext = os.path.splitext(anno_url)[1]
if split in ['train', 'val']:
utils.extractall(download_file_path, data_dir, ext)
elif split in ['valminusminival', 'minival']:
utils.extractall(download_file_path, annos_root, ext)

if mode == 'panoptic':
pixelmap_path = os.path.join(
annos_root, 'panoptic_{}{}'.format(split, year))
if not os.path.exists(pixelmap_path):
utils.extractall(pixelmap_path + '.zip', annos_root, '.zip')
if mode == 'panoptic':
pixelmap_path = os.path.join(
annos_root, 'panoptic_{}{}'.format(split, year))
if not os.path.exists(pixelmap_path):
utils.extractall(pixelmap_path + '.zip', annos_root, '.zip')
return data_dir


Expand Down
35 changes: 20 additions & 15 deletions chainercv/datasets/cub/cub_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filelock
import numpy as np
import os

Expand All @@ -16,28 +17,32 @@

def get_cub():
data_root = download.get_dataset_directory(root)
base_path = os.path.join(data_root, 'CUB_200_2011')
if os.path.exists(base_path):
# skip downloading
return base_path
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
base_path = os.path.join(data_root, 'CUB_200_2011')
if os.path.exists(base_path):
# skip downloading
return base_path

download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)
download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)
return base_path


def get_cub_prob_map():
data_root = download.get_dataset_directory(root)
base_path = os.path.join(data_root, 'segmentations')
if os.path.exists(base_path):
# skip downloading
return base_path
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
base_path = os.path.join(data_root, 'segmentations')
if os.path.exists(base_path):
# skip downloading
return base_path

prob_map_download_file_path = utils.cached_download(prob_map_url)
prob_map_ext = os.path.splitext(prob_map_url)[1]
utils.extractall(
prob_map_download_file_path, data_root, prob_map_ext)
prob_map_download_file_path = utils.cached_download(prob_map_url)
prob_map_ext = os.path.splitext(prob_map_url)[1]
utils.extractall(
prob_map_download_file_path, data_root, prob_map_ext)
return base_path


Expand Down
19 changes: 11 additions & 8 deletions chainercv/datasets/online_products/online_products_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filelock
import numpy as np
import os

Expand Down Expand Up @@ -28,14 +29,16 @@

def _get_online_products():
data_root = download.get_dataset_directory(root)
base_path = os.path.join(data_root, 'Stanford_Online_Products')
if os.path.exists(base_path):
# skip downloading
return base_path

download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
base_path = os.path.join(data_root, 'Stanford_Online_Products')
if os.path.exists(base_path):
# skip downloading
return base_path

download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)
return base_path


Expand Down
30 changes: 17 additions & 13 deletions chainercv/datasets/sbd/sbd_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filelock
import os
import six

Expand Down Expand Up @@ -30,19 +31,22 @@ def _generate_voc2012_txt(base_path):

def get_sbd():
data_root = download.get_dataset_directory(root)
base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset')

train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt')
if os.path.exists(train_voc2012_file):
# skip downloading
return base_path

download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)

six.moves.urllib.request.urlretrieve(train_voc2012_url, train_voc2012_file)
_generate_voc2012_txt(base_path)
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
base_path = os.path.join(data_root, 'benchmark_RELEASE/dataset')

train_voc2012_file = os.path.join(base_path, 'train_voc2012.txt')
if os.path.exists(train_voc2012_file):
# skip downloading
return base_path

download_file_path = utils.cached_download(url)
ext = os.path.splitext(url)[1]
utils.extractall(download_file_path, data_root, ext)

six.moves.urllib.request.urlretrieve(
train_voc2012_url, train_voc2012_file)
_generate_voc2012_txt(base_path)

return base_path

Expand Down
22 changes: 13 additions & 9 deletions chainercv/datasets/voc/voc_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import filelock
import numpy as np
import os

Expand Down Expand Up @@ -26,15 +27,18 @@ def get_voc(year, split):
key = '2007_test'

data_root = download.get_dataset_directory(root)
base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year))
split_file = os.path.join(base_path, 'ImageSets/Main/{}.txt'.format(split))
if os.path.exists(split_file):
# skip downloading
return base_path

download_file_path = utils.cached_download(urls[key])
ext = os.path.splitext(urls[key])[1]
utils.extractall(download_file_path, data_root, ext)
# To support ChainerMN, the target directory should be locked.
with filelock.FileLock(os.path.join(data_root, 'lock')):
base_path = os.path.join(data_root, 'VOCdevkit/VOC{}'.format(year))
split_file = os.path.join(
base_path, 'ImageSets/Main/{}.txt'.format(split))
if os.path.exists(split_file):
# skip downloading
return base_path

download_file_path = utils.cached_download(urls[key])
ext = os.path.splitext(urls[key])[1]
utils.extractall(download_file_path, data_root, ext)
return base_path


Expand Down