Skip to content

Commit 58e31d0

Browse files
committed
RF: Simplify copyfiles, updating logic for links
1 parent 29aa2ba commit 58e31d0

File tree

1 file changed

+76
-74
lines changed

1 file changed

+76
-74
lines changed

nipype/utils/filemanip.py

Lines changed: 76 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import os
1616
import re
1717
import shutil
18+
import posixpath
1819

1920
import numpy as np
2021

@@ -31,31 +32,6 @@ class FileNotFoundError(Exception):
3132
pass
3233

3334

34-
def nipype_hardlink_wrapper(raw_src, raw_dst):
35-
"""Attempt to use hard link instead of file copy.
36-
The intent is to avoid unnnecessary duplication
37-
of large files when using a DataSink.
38-
Hard links are not supported on all file systems
39-
or os environments, and will not succeed if the
40-
src and dst are not on the same physical hardware
41-
partition.
42-
If the hardlink fails, then fall back to using
43-
a standard copy.
44-
"""
45-
# Use realpath to avoid hardlinking symlinks
46-
src = os.path.realpath(raw_src)
47-
# Use normpath, in case destination is a symlink
48-
dst = os.path.normpath(raw_dst)
49-
del raw_src
50-
del raw_dst
51-
if src != dst and os.path.exists(dst):
52-
os.unlink(dst) # First remove destination
53-
try:
54-
os.link(src, dst) # Reference same inode to avoid duplication
55-
except:
56-
shutil.copyfile(src, dst) # Fall back to traditional copy
57-
58-
5935
def split_filename(fname):
6036
"""Split a filename into parts: path, base filename and extension.
6137
@@ -201,7 +177,13 @@ def hash_timestamp(afile):
201177

202178
def copyfile(originalfile, newfile, copy=False, create_new=False,
203179
hashmethod=None, use_hardlink=False):
204-
"""Copy or symlink ``originalfile`` to ``newfile``.
180+
"""Copy or link ``originalfile`` to ``newfile``.
181+
182+
If ``use_hardlink`` is True, and the file can be hard-linked, then a
183+
link is created, instead of copying the file.
184+
185+
If a hard link is not created and ``copy`` is False, then a symbolic
186+
link is created.
205187
206188
Parameters
207189
----------
@@ -212,6 +194,9 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
212194
copy : Bool
213195
specifies whether to copy or symlink files
214196
(default=False) but only for POSIX systems
197+
use_hardlink : Bool
198+
specifies whether to hard-link files, when able
199+
(Default=False), taking precedence over copy
215200
216201
Returns
217202
-------
@@ -237,67 +222,84 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
237222
if hashmethod is None:
238223
hashmethod = config.get('execution', 'hash_method').lower()
239224

240-
elif os.path.exists(newfile):
241-
if hashmethod == 'timestamp':
242-
newhash = hash_timestamp(newfile)
243-
elif hashmethod == 'content':
244-
newhash = hash_infile(newfile)
245-
fmlogger.debug("File: %s already exists,%s, copy:%d"
246-
% (newfile, newhash, copy))
247-
# the following seems unnecessary
248-
# if os.name is 'posix' and copy:
249-
# if os.path.lexists(newfile) and os.path.islink(newfile):
250-
# os.unlink(newfile)
251-
# newhash = None
252-
if os.name is 'posix' and not copy:
253-
if os.path.lexists(newfile):
254-
if hashmethod == 'timestamp':
255-
orighash = hash_timestamp(originalfile)
256-
elif hashmethod == 'content':
257-
orighash = hash_infile(originalfile)
258-
fmlogger.debug('Original hash: %s, %s' % (originalfile, orighash))
259-
if newhash != orighash:
260-
os.unlink(newfile)
261-
if (newhash is None) or (newhash != orighash):
262-
try:
263-
os.symlink(originalfile, newfile)
264-
except OSError:
265-
return copyfile(originalfile, newfile, True, create_new,
266-
hashmethod, use_hardlink)
267-
else:
268-
if newhash:
225+
# Existing file
226+
# -------------
227+
# Options:
228+
# symlink
229+
# to originalfile (keep if not (use_hardlink or copy))
230+
# to other file (unlink)
231+
# regular file
232+
# hard link to originalfile (keep)
233+
# copy of file (same hash) (keep)
234+
# different file (diff hash) (unlink)
235+
keep = False
236+
if os.path.lexists(newfile):
237+
if os.path.islink(newfile):
238+
if all(os.path.readlink(newfile) == originalfile, not use_hardlink,
239+
not copy):
240+
keep = True
241+
elif posixpath.samefile(newfile, originalfile):
242+
keep = True
243+
else:
269244
if hashmethod == 'timestamp':
270-
orighash = hash_timestamp(originalfile)
245+
hashfn = hash_timestamp
271246
elif hashmethod == 'content':
272-
orighash = hash_infile(originalfile)
273-
if (newhash is None) or (newhash != orighash):
274-
try:
275-
fmlogger.debug("Copying File: %s->%s" %
276-
(newfile, originalfile))
277-
if use_hardlink:
278-
nipype_hardlink_wrapper(originalfile, newfile)
279-
else:
280-
shutil.copyfile(originalfile, newfile)
281-
except shutil.Error as e:
282-
fmlogger.warn(e.message)
283-
else:
247+
hashfn = hash_infile
248+
newhash = hashfn(newfile)
249+
fmlogger.debug("File: %s already exists,%s, copy:%d"
250+
% (newfile, newhash, copy))
251+
orighash = hashfn(originalfile)
252+
keep = hashfn(newfile) == hashfn(originalfile)
253+
if keep:
284254
fmlogger.debug("File: %s already exists, not overwriting, copy:%d"
285255
% (newfile, copy))
256+
else:
257+
os.unlink(newfile)
258+
259+
# New file
260+
# --------
261+
# use_hardlink & can_hardlink => hardlink
262+
# ~hardlink & ~copy & can_symlink => symlink
263+
# ~hardlink & ~symlink => copy
264+
if not keep and use_hardlink:
265+
try:
266+
fmlogger.debug("Linking File: %s->%s" % (newfile, originalfile))
267+
# Use realpath to avoid hardlinking symlinks
268+
os.link(os.path.realpath(originalfile), newfile)
269+
except OSError:
270+
use_hardlink=False # Disable for associated files
271+
else:
272+
keep = True
273+
274+
if not keep and not copy and os.name == 'posix':
275+
try:
276+
fmlogger.debug("Symlinking File: %s->%s" % (newfile, originalfile))
277+
os.symlink(originalfile, newfile)
278+
except OSError:
279+
copy = True # Disable for associated files
280+
else:
281+
keep = True
282+
283+
if not keep:
284+
try:
285+
fmlogger.debug("Copying File: %s->%s" % (newfile, originalfile))
286+
shutil.copyfile(originalfile, newfile)
287+
except shutil.Error as e:
288+
fmlogger.warn(e.message)
289+
290+
# Associated files
286291
if originalfile.endswith(".img"):
287292
hdrofile = originalfile[:-4] + ".hdr"
288293
hdrnfile = newfile[:-4] + ".hdr"
289294
matofile = originalfile[:-4] + ".mat"
290295
if os.path.exists(matofile):
291296
matnfile = newfile[:-4] + ".mat"
292-
copyfile(matofile, matnfile, copy, create_new, hashmethod,
293-
use_hardlink)
294-
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
295-
use_hardlink)
297+
copyfile(matofile, matnfile, copy, use_hardlink=use_hardlink)
298+
copyfile(hdrofile, hdrnfile, copy, use_hardlink=use_hardlink)
296299
elif originalfile.endswith(".BRIK"):
297300
hdrofile = originalfile[:-5] + ".HEAD"
298301
hdrnfile = newfile[:-5] + ".HEAD"
299-
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
300-
use_hardlink)
302+
copyfile(hdrofile, hdrnfile, copy, use_hardlink=use_hardlink)
301303

302304
return newfile
303305

0 commit comments

Comments
 (0)