Skip to content

Commit 00af979

Browse files
ambvbarneygale
andauthored
[3.9] bpo-43757: Make pathlib use os.path.realpath() to resolve symlinks in a path (GH-25264) (GH-135035)
Also adds a new "strict" argument to realpath() to avoid changing the default behaviour of pathlib while sharing the implementation. (cherry-picked from commit baecfbd) Co-authored-by: Barney Gale <barney.gale@gmail.com>
1 parent 03ac445 commit 00af979

File tree

7 files changed

+192
-124
lines changed

7 files changed

+192
-124
lines changed

Doc/library/os.path.rst

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -345,22 +345,34 @@ the :mod:`glob` module.)
345345
Accepts a :term:`path-like object`.
346346

347347

348-
.. function:: realpath(path)
348+
.. function:: realpath(path, *, strict=False)
349349

350350
Return the canonical path of the specified filename, eliminating any symbolic
351351
links encountered in the path (if they are supported by the operating
352352
system).
353353

354+
If a path doesn't exist or a symlink loop is encountered, and *strict* is
355+
``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is
356+
resolved as far as possible and any remainder is appended without checking
357+
whether it exists.
358+
354359
.. note::
355-
When symbolic link cycles occur, the returned path will be one member of
356-
the cycle, but no guarantee is made about which member that will be.
360+
This function emulates the operating system's procedure for making a path
361+
canonical, which differs slightly between Windows and UNIX with respect
362+
to how links and subsequent path components interact.
363+
364+
Operating system APIs make paths canonical as needed, so it's not
365+
normally necessary to call this function.
357366

358367
.. versionchanged:: 3.6
359368
Accepts a :term:`path-like object`.
360369

361370
.. versionchanged:: 3.8
362371
Symbolic links and junctions are now resolved on Windows.
363372

373+
.. versionchanged:: 3.9.23
374+
The *strict* parameter was added.
375+
364376

365377
.. function:: relpath(path, start=os.curdir)
366378

Lib/ntpath.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,7 @@ def _getfinalpathname_nonstrict(path):
622622
tail = join(name, tail) if tail else name
623623
return tail
624624

625-
def realpath(path):
625+
def realpath(path, *, strict=False):
626626
path = normpath(path)
627627
if isinstance(path, bytes):
628628
prefix = b'\\\\?\\'
@@ -647,6 +647,8 @@ def realpath(path):
647647
path = _getfinalpathname(path)
648648
initial_winerror = 0
649649
except OSError as ex:
650+
if strict:
651+
raise
650652
initial_winerror = ex.winerror
651653
path = _getfinalpathname_nonstrict(path)
652654
# The path returned by _getfinalpathname will always start with \\?\ -

Lib/pathlib.py

Lines changed: 38 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,6 @@
1414

1515

1616
supports_symlinks = True
17-
if os.name == 'nt':
18-
import nt
19-
if sys.getwindowsversion()[:2] >= (6, 0):
20-
from nt import _getfinalpathname
21-
else:
22-
supports_symlinks = False
23-
_getfinalpathname = None
24-
else:
25-
nt = None
2617

2718

2819
__all__ = [
@@ -34,14 +25,17 @@
3425
# Internals
3526
#
3627

28+
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
29+
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
30+
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
31+
3732
# EBADF - guard against macOS `stat` throwing EBADF
3833
_IGNORED_ERROS = (ENOENT, ENOTDIR, EBADF, ELOOP)
3934

4035
_IGNORED_WINERRORS = (
41-
21, # ERROR_NOT_READY - drive exists but is not accessible
42-
123, # ERROR_INVALID_NAME - fix for bpo-35306
43-
1921, # ERROR_CANT_RESOLVE_FILENAME - fix for broken symlink pointing to itself
44-
)
36+
_WINERROR_NOT_READY,
37+
_WINERROR_INVALID_NAME,
38+
_WINERROR_CANT_RESOLVE_FILENAME)
4539

4640
def _ignore_error(exception):
4741
return (getattr(exception, 'errno', None) in _IGNORED_ERROS or
@@ -200,30 +194,6 @@ def casefold_parts(self, parts):
200194
def compile_pattern(self, pattern):
201195
return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch
202196

203-
def resolve(self, path, strict=False):
204-
s = str(path)
205-
if not s:
206-
return os.getcwd()
207-
previous_s = None
208-
if _getfinalpathname is not None:
209-
if strict:
210-
return self._ext_to_normal(_getfinalpathname(s))
211-
else:
212-
tail_parts = [] # End of the path after the first one not found
213-
while True:
214-
try:
215-
s = self._ext_to_normal(_getfinalpathname(s))
216-
except FileNotFoundError:
217-
previous_s = s
218-
s, tail = os.path.split(s)
219-
tail_parts.append(tail)
220-
if previous_s == s:
221-
return path
222-
else:
223-
return os.path.join(s, *reversed(tail_parts))
224-
# Means fallback on absolute
225-
return None
226-
227197
def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
228198
prefix = ''
229199
if s.startswith(ext_prefix):
@@ -234,10 +204,6 @@ def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
234204
s = '\\' + s[3:]
235205
return prefix, s
236206

237-
def _ext_to_normal(self, s):
238-
# Turn back an extended path into a normal DOS-like path
239-
return self._split_extended_path(s)[1]
240-
241207
def is_reserved(self, parts):
242208
# NOTE: the rules for reserved names seem somewhat complicated
243209
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
@@ -324,54 +290,6 @@ def casefold_parts(self, parts):
324290
def compile_pattern(self, pattern):
325291
return re.compile(fnmatch.translate(pattern)).fullmatch
326292

327-
def resolve(self, path, strict=False):
328-
sep = self.sep
329-
accessor = path._accessor
330-
seen = {}
331-
def _resolve(path, rest):
332-
if rest.startswith(sep):
333-
path = ''
334-
335-
for name in rest.split(sep):
336-
if not name or name == '.':
337-
# current dir
338-
continue
339-
if name == '..':
340-
# parent dir
341-
path, _, _ = path.rpartition(sep)
342-
continue
343-
if path.endswith(sep):
344-
newpath = path + name
345-
else:
346-
newpath = path + sep + name
347-
if newpath in seen:
348-
# Already seen this path
349-
path = seen[newpath]
350-
if path is not None:
351-
# use cached value
352-
continue
353-
# The symlink is not resolved, so we must have a symlink loop.
354-
raise RuntimeError("Symlink loop from %r" % newpath)
355-
# Resolve the symbolic link
356-
try:
357-
target = accessor.readlink(newpath)
358-
except OSError as e:
359-
if e.errno != EINVAL and strict:
360-
raise
361-
# Not a symlink, or non-strict mode. We just leave the path
362-
# untouched.
363-
path = newpath
364-
else:
365-
seen[newpath] = None # not resolved symlink
366-
path = _resolve(path, target)
367-
seen[newpath] = path # resolved symlink
368-
369-
return path
370-
# NOTE: according to POSIX, getcwd() cannot contain path components
371-
# which are symlinks.
372-
base = '' if path.is_absolute() else os.getcwd()
373-
return _resolve(base, str(path)) or sep
374-
375293
def is_reserved(self, parts):
376294
return False
377295

@@ -443,17 +361,11 @@ def link_to(self, target):
443361

444362
replace = os.replace
445363

446-
if nt:
447-
if supports_symlinks:
448-
symlink = os.symlink
449-
else:
450-
def symlink(a, b, target_is_directory):
451-
raise NotImplementedError("symlink() not available on this system")
364+
if hasattr(os, "symlink"):
365+
symlink = os.symlink
452366
else:
453-
# Under POSIX, os.symlink() takes two args
454-
@staticmethod
455-
def symlink(a, b, target_is_directory):
456-
return os.symlink(a, b)
367+
def symlink(self, src, dst, target_is_directory=False):
368+
raise NotImplementedError("os.symlink() not available on this system")
457369

458370
utime = os.utime
459371

@@ -475,6 +387,12 @@ def group(self, path):
475387
except ImportError:
476388
raise NotImplementedError("Path.group() is unsupported on this system")
477389

390+
getcwd = os.getcwd
391+
392+
expanduser = staticmethod(os.path.expanduser)
393+
394+
realpath = staticmethod(os.path.realpath)
395+
478396

479397
_normal_accessor = _NormalAccessor()
480398

@@ -1212,17 +1130,27 @@ def resolve(self, strict=False):
12121130
normalizing it (for example turning slashes into backslashes under
12131131
Windows).
12141132
"""
1215-
s = self._flavour.resolve(self, strict=strict)
1216-
if s is None:
1217-
# No symlink resolution => for consistency, raise an error if
1218-
# the path doesn't exist or is forbidden
1219-
self.stat()
1220-
s = str(self.absolute())
1221-
# Now we have no symlinks in the path, it's safe to normalize it.
1222-
normed = self._flavour.pathmod.normpath(s)
1223-
obj = self._from_parts((normed,), init=False)
1224-
obj._init(template=self)
1225-
return obj
1133+
1134+
def check_eloop(e):
1135+
winerror = getattr(e, 'winerror', 0)
1136+
if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME:
1137+
raise RuntimeError("Symlink loop from %r" % e.filename)
1138+
1139+
try:
1140+
s = self._accessor.realpath(self, strict=strict)
1141+
except OSError as e:
1142+
check_eloop(e)
1143+
raise
1144+
p = self._from_parts((s,))
1145+
1146+
# In non-strict mode, realpath() doesn't raise on symlink loops.
1147+
# Ensure we get an exception by calling stat()
1148+
if not strict:
1149+
try:
1150+
p.stat()
1151+
except OSError as e:
1152+
check_eloop(e)
1153+
return p
12261154

12271155
def stat(self):
12281156
"""

Lib/posixpath.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -385,16 +385,16 @@ def abspath(path):
385385
# Return a canonical path (i.e. the absolute location of a file on the
386386
# filesystem).
387387

388-
def realpath(filename):
388+
def realpath(filename, *, strict=False):
389389
"""Return the canonical path of the specified filename, eliminating any
390390
symbolic links encountered in the path."""
391391
filename = os.fspath(filename)
392-
path, ok = _joinrealpath(filename[:0], filename, {})
392+
path, ok = _joinrealpath(filename[:0], filename, strict, {})
393393
return abspath(path)
394394

395395
# Join two paths, normalizing and eliminating any symbolic links
396396
# encountered in the second path.
397-
def _joinrealpath(path, rest, seen):
397+
def _joinrealpath(path, rest, strict, seen):
398398
if isinstance(path, bytes):
399399
sep = b'/'
400400
curdir = b'.'
@@ -423,7 +423,15 @@ def _joinrealpath(path, rest, seen):
423423
path = pardir
424424
continue
425425
newpath = join(path, name)
426-
if not islink(newpath):
426+
try:
427+
st = os.lstat(newpath)
428+
except OSError:
429+
if strict:
430+
raise
431+
is_link = False
432+
else:
433+
is_link = stat.S_ISLNK(st.st_mode)
434+
if not is_link:
427435
path = newpath
428436
continue
429437
# Resolve the symbolic link
@@ -434,10 +442,14 @@ def _joinrealpath(path, rest, seen):
434442
# use cached value
435443
continue
436444
# The symlink is not resolved, so we must have a symlink loop.
437-
# Return already resolved part + rest of the path unchanged.
438-
return join(newpath, rest), False
445+
if strict:
446+
# Raise OSError(errno.ELOOP)
447+
os.stat(newpath)
448+
else:
449+
# Return already resolved part + rest of the path unchanged.
450+
return join(newpath, rest), False
439451
seen[newpath] = None # not resolved symlink
440-
path, ok = _joinrealpath(path, os.readlink(newpath), seen)
452+
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
441453
if not ok:
442454
return join(path, rest), False
443455
seen[newpath] = path # resolved symlink

0 commit comments

Comments
 (0)