Skip to content

Commit 1c27198

Browse files
committed
Merge pull request #1161 from BRAINSia/TryHardLinks
ENH: Attempt to use hard links for data sink.
2 parents 67f2a59 + a71ee4c commit 1c27198

File tree

5 files changed

+64
-10
lines changed

5 files changed

+64
-10
lines changed

CHANGES

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
Next release
22
============
33

4+
* ENH: Attempt to use hard links for data sink.
5+
(https://github.com/nipy/nipype/pull/1161)
6+
* FIX: Updates to SGE Plugins
7+
(https://github.com/nipy/nipype/pull/1129)
8+
* ENH: Add ants JointFusion() node with testing
9+
(https://github.com/nipy/nipype/pull/1160)
10+
* ENH: Add --float option for antsRegistration calls
11+
(https://github.com/nipy/nipype/pull/1159)
412
* ENH: Added interface to simulate DWIs using the multi-tensor model
513
(https://github.com/nipy/nipype/pull/1085)
614
* ENH: New interface for FSL fslcpgeom utility (https://github.com/nipy/nipype/pull/1152)

doc/users/config_file.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,17 @@ Execution
8484
other nodes) will never be deleted independent of this parameter. (possible
8585
values: ``true`` and ``false``; default value: ``true``)
8686

87+
*try_hard_link_datasink*
88+
When the DataSink is used to produce an orginized output file outside
89+
of nipypes internal cache structure, a file system hard link will be
90+
attempted first. A hard link allow multiple file paths to point to the
91+
same physical storage location on disk if the condisions allow. By
92+
refering to the same physical file on disk (instead of copying files
93+
byte-by-byte) we can avoid unnecessary data duplication. If hard links
94+
are not supported for the source or destination paths specified, then
95+
a standard byte-by-byte copy is used. (possible values: ``true`` and
96+
``false``; default value: ``true``)
97+
8798
*use_relative_paths*
8899
Should the paths stored in results (and used to look for inputs)
89100
be relative or absolute. Relative paths allow moving the whole

nipype/interfaces/io.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
from nipype.utils.misc import human_order_sorted
3333
from nipype.external import six
3434

35+
from ..utils.misc import str2bool
36+
from .. import config
37+
3538
try:
3639
import pyxnat
3740
except:
@@ -53,7 +56,7 @@
5356
iflogger = logging.getLogger('interface')
5457

5558

56-
def copytree(src, dst):
59+
def copytree(src, dst, use_hardlink=False):
5760
"""Recursively copy a directory tree using
5861
nipype.utils.filemanip.copyfile()
5962
@@ -75,9 +78,10 @@ def copytree(src, dst):
7578
dstname = os.path.join(dst, name)
7679
try:
7780
if os.path.isdir(srcname):
78-
copytree(srcname, dstname)
81+
copytree(srcname, dstname, use_hardlink)
7982
else:
80-
copyfile(srcname, dstname, True, hashmethod='content')
83+
copyfile(srcname, dstname, True, hashmethod='content',
84+
use_hardlink=use_hardlink)
8185
except (IOError, os.error), why:
8286
errors.append((srcname, dstname, str(why)))
8387
# catch the Error from the recursive copytree so that we can
@@ -245,8 +249,8 @@ def __init__(self, infields=None, force_run=True, **kwargs):
245249
self._always_run = True
246250

247251
def _get_dst(self, src):
248-
## If path is directory with trailing os.path.sep,
249-
## then remove that for a more robust behavior
252+
# If path is directory with trailing os.path.sep,
253+
# then remove that for a more robust behavior
250254
src = src.rstrip(os.path.sep)
251255
path, fname = os.path.split(src)
252256
if self.inputs.parameterization:
@@ -306,6 +310,8 @@ def _list_outputs(self):
306310
pass
307311
else:
308312
raise(inst)
313+
use_hardlink = str2bool(config.get('execution',
314+
'try_hard_link_datasink') )
309315
for key, files in self.inputs._outputs.items():
310316
if not isdefined(files):
311317
continue
@@ -338,7 +344,8 @@ def _list_outputs(self):
338344
else:
339345
raise(inst)
340346
iflogger.debug("copyfile: %s %s" % (src, dst))
341-
copyfile(src, dst, copy=True, hashmethod='content')
347+
copyfile(src, dst, copy=True, hashmethod='content',
348+
use_hardlink=use_hardlink)
342349
out_files.append(dst)
343350
elif os.path.isdir(src):
344351
dst = self._get_dst(os.path.join(src, ''))
@@ -364,7 +371,7 @@ def _list_outputs(self):
364371
return outputs
365372

366373

367-
class DataGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): # InterfaceInputSpec):
374+
class DataGrabberInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
368375
base_directory = Directory(exists=True,
369376
desc='Path to the base directory consisting of subject data.')
370377
raise_on_empty = traits.Bool(True, usedefault=True,

nipype/utils/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
plugin = Linear
4444
remove_node_directories = false
4545
remove_unnecessary_outputs = true
46+
try_hard_link_datasink = true
4647
single_thread_matlab = true
4748
stop_on_first_crash = false
4849
stop_on_first_rerun = false

nipype/utils/filemanip.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,29 @@ class FileNotFoundError(Exception):
2828
pass
2929

3030

31+
def nipype_hardlink_wrapper(raw_src, raw_dst):
32+
"""Attempt to use hard link instead of file copy.
33+
The intent is to avoid unnnecessary duplication
34+
of large files when using a DataSink.
35+
Hard links are not supported on all file systems
36+
or os environments, and will not succeed if the
37+
src and dst are not on the same physical hardware
38+
partition.
39+
If the hardlink fails, then fall back to using
40+
a standard copy.
41+
"""
42+
src = os.path.normpath(raw_src)
43+
dst = os.path.normpath(raw_dst)
44+
del raw_src
45+
del raw_dst
46+
if src != dst and os.path.exists(dst):
47+
os.unlink(dst) # First remove destination
48+
try:
49+
os.link(src, dst) # Reference same inode to avoid duplication
50+
except:
51+
shutil.copyfile(src, dst) # Fall back to traditional copy
52+
53+
3154
def split_filename(fname):
3255
"""Split a filename into parts: path, base filename and extension.
3356
@@ -173,7 +196,7 @@ def hash_timestamp(afile):
173196

174197

175198
def copyfile(originalfile, newfile, copy=False, create_new=False,
176-
hashmethod=None):
199+
hashmethod=None, use_hardlink=False):
177200
"""Copy or symlink ``originalfile`` to ``newfile``.
178201
179202
Parameters
@@ -241,8 +264,12 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
241264
orighash = hash_infile(originalfile)
242265
if (newhash is None) or (newhash != orighash):
243266
try:
244-
fmlogger.debug("Copying File: %s->%s" % (newfile, originalfile))
245-
shutil.copyfile(originalfile, newfile)
267+
fmlogger.debug("Copying File: %s->%s" %
268+
(newfile, originalfile))
269+
if use_hardlink:
270+
nipype_hardlink_wrapper(originalfile, newfile)
271+
else:
272+
shutil.copyfile(originalfile, newfile)
246273
except shutil.Error, e:
247274
fmlogger.warn(e.message)
248275
else:

0 commit comments

Comments
 (0)