Skip to content

bpo-9949: Enable symlink traversal for ntpath.realpath #15287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 21, 2019
Merged
10 changes: 9 additions & 1 deletion Doc/library/os.path.rst
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,19 @@ the :mod:`glob` module.)
.. function:: realpath(path)

Return the canonical path of the specified filename, eliminating any symbolic
links encountered in the path (if they are supported by the operating system).
links encountered in the path (if they are supported by the operating
system).

.. note::
When symbolic link cycles occur, the returned path will be one member of
the cycle, but no guarantee is made about which member that will be.

.. versionchanged:: 3.6
Accepts a :term:`path-like object`.

.. versionchanged:: 3.8
Symbolic links and junctions are now resolved on Windows.


.. function:: relpath(path, start=os.curdir)

Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level.
environment variable and does not use :envvar:`HOME`, which is not normally set
for regular user accounts.

:func:`~os.path.realpath` on Windows now resolves reparse points, including
symlinks and directory junctions.


ncurses
-------
Expand Down
107 changes: 88 additions & 19 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,94 @@ def abspath(path):
except (OSError, ValueError):
return _abspath_fallback(path)

# realpath is a no-op on systems without islink support
realpath = abspath
try:
from nt import _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
realpath = abspath
else:
def _readlink_deep(path, seen=None):
if seen is None:
seen = set()

while normcase(path) not in seen:
seen.add(normcase(path))
try:
path = _nt_readlink(path)
Copy link
Contributor

@eryksun eryksun Aug 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're allowing reading junction targets, you must fail a remote path here if path is a junction, per os.lstat. A junction target in a UNC path is meaningless to us. A junction has to target local devices on a system, i.e. its local DOS drive and volume GUID names. At best these aren't defined for us, and at worst they map to an unrelated drive on our side. If for some reason junction "spam" fails to resolve on the server, we cannot go any further. We leave it as "spam" in the UNC path, and we're done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If path is a relative symlink, it must be evaluated relative to its parent directory, not out current directory.

except OSError as ex:
# Stop on file (2) or directory (3) not found, or
# paths that are not reparse points (4390)
if ex.winerror in (2, 3, 4390):
break
raise
except ValueError:
# Stop on reparse points that are not symlinks
break
return path

def _getfinalpathname_nonstrict(path):
# Fast path to get the final path name. If this succeeds, there
# is no need to go any further.
try:
return _getfinalpathname(path)
except OSError:
pass

# Allow file (2) or directory (3) not found, invalid syntax (123),
# and symlinks that cannot be followed (1921)
allowed_winerror = 2, 3, 123, 1921

# Non-strict algorithm is to find as much of the target directory
# as we can and join the rest.
tail = ''
seen = set()
while path:
try:
path = _readlink_deep(path, seen)
path = _getfinalpathname(path)
return join(path, tail) if tail else path
except OSError as ex:
if ex.winerror not in allowed_winerror:
raise
path, name = split(path)
if path and not name:
return abspath(path + tail)
tail = join(name, tail) if tail else name
return abspath(tail)

def realpath(path):
path = os.fspath(path)
if isinstance(path, bytes):
prefix = b'\\\\?\\'
unc_prefix = b'\\\\?\\UNC\\'
new_unc_prefix = b'\\\\'
cwd = os.getcwdb()
else:
prefix = '\\\\?\\'
unc_prefix = '\\\\?\\UNC\\'
new_unc_prefix = '\\\\'
cwd = os.getcwd()
had_prefix = path.startswith(prefix)
path = _getfinalpathname_nonstrict(path)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.
if not had_prefix and path.startswith(prefix):
# For UNC paths, the prefix will actually be \\?\UNC\
# Handle that case as well.
if path.startswith(unc_prefix):
spath = new_unc_prefix + path[len(unc_prefix):]
else:
spath = path[len(prefix):]
# Ensure that the non-prefixed path resolves to the same path
try:
if _getfinalpathname(spath) == path:
path = spath
except OSError as ex:
pass
return path


# Win9x family and earlier have no Unicode filename support.
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
sys.getwindowsversion()[3] >= 2)
Expand Down Expand Up @@ -633,23 +719,6 @@ def commonpath(paths):
raise


# determine if two files are in fact the same file
try:
# GetFinalPathNameByHandle is available starting with Windows 6.0.
# Windows XP and non-Windows OS'es will mock _getfinalpathname.
if sys.getwindowsversion()[:2] >= (6, 0):
from nt import _getfinalpathname
else:
raise ImportError
except (AttributeError, ImportError):
# On Windows XP and earlier, two files are the same if their absolute
# pathnames are the same.
# Non-Windows operating systems fake this method with an XP
# approximation.
def _getfinalpathname(f):
return normcase(abspath(f))


try:
# The genericpath.isdir implementation uses os.stat and checks the mode
# attribute to tell whether or not the path is a directory.
Expand Down
198 changes: 195 additions & 3 deletions Lib/test/test_ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,22 @@
from test import support, test_genericpath
from tempfile import TemporaryFile


try:
import nt
except ImportError:
# Most tests can complete without the nt module,
# but for those that require it we import here.
nt = None

try:
ntpath._getfinalpathname
except AttributeError:
HAVE_GETFINALPATHNAME = False
else:
HAVE_GETFINALPATHNAME = True


def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\")
gotResult = eval(fn)
Expand Down Expand Up @@ -194,6 +203,189 @@ def test_normpath(self):
tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')

def test_realpath_curdir(self):
expected = ntpath.normpath(os.getcwd())
tester("ntpath.realpath('.')", expected)
tester("ntpath.realpath('./.')", expected)
tester("ntpath.realpath('/'.join(['.'] * 100))", expected)
tester("ntpath.realpath('.\\.')", expected)
tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)

def test_realpath_pardir(self):
expected = ntpath.normpath(os.getcwd())
tester("ntpath.realpath('..')", ntpath.dirname(expected))
tester("ntpath.realpath('../..')",
ntpath.dirname(ntpath.dirname(expected)))
tester("ntpath.realpath('/'.join(['..'] * 50))",
ntpath.splitdrive(expected)[0] + '\\')
tester("ntpath.realpath('..\\..')",
ntpath.dirname(ntpath.dirname(expected)))
tester("ntpath.realpath('\\'.join(['..'] * 50))",
ntpath.splitdrive(expected)[0] + '\\')

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_basic(self):
ABSTFN = ntpath.abspath(support.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")

os.symlink(ABSTFN, ABSTFN + "1")
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
os.fsencode(ABSTFN))

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_relative(self):
ABSTFN = ntpath.abspath(support.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")

os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_broken_symlinks(self):
ABSTFN = ntpath.abspath(support.TESTFN)
os.mkdir(ABSTFN)
self.addCleanup(support.rmtree, ABSTFN)

with support.change_cwd(ABSTFN):
os.mkdir("subdir")
os.chdir("subdir")
os.symlink(".", "recursive")
os.symlink("..", "parent")
os.chdir("..")
os.symlink(".", "self")
os.symlink("missing", "broken")
os.symlink(r"broken\bar", "broken1")
os.symlink(r"self\self\broken", "broken2")
os.symlink(r"subdir\parent\subdir\parent\broken", "broken3")
os.symlink(ABSTFN + r"\broken", "broken4")
os.symlink(r"recursive\..\broken", "broken5")

self.assertEqual(ntpath.realpath("broken"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath(r"broken\foo"),
ABSTFN + r"\missing\foo")
self.assertEqual(ntpath.realpath(r"broken1"),
ABSTFN + r"\missing\bar")
self.assertEqual(ntpath.realpath(r"broken1\baz"),
ABSTFN + r"\missing\bar\baz")
self.assertEqual(ntpath.realpath("broken2"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken3"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken4"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken5"),
ABSTFN + r"\missing")

self.assertEqual(ntpath.realpath(b"broken"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(rb"broken\foo"),
os.fsencode(ABSTFN + r"\missing\foo"))
self.assertEqual(ntpath.realpath(rb"broken1"),
os.fsencode(ABSTFN + r"\missing\bar"))
self.assertEqual(ntpath.realpath(rb"broken1\baz"),
os.fsencode(ABSTFN + r"\missing\bar\baz"))
self.assertEqual(ntpath.realpath(b"broken2"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(rb"broken3"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(b"broken4"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(b"broken5"),
os.fsencode(ABSTFN + r"\missing"))

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_loops(self):
# Bug #930024, return the path unchanged if we get into an infinite
# symlink loop.
ABSTFN = ntpath.abspath(support.TESTFN)
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")
self.addCleanup(support.unlink, ABSTFN + "2")
self.addCleanup(support.unlink, ABSTFN + "y")
self.addCleanup(support.unlink, ABSTFN + "c")
self.addCleanup(support.unlink, ABSTFN + "a")

P = "\\\\?\\"

os.symlink(ABSTFN, ABSTFN)
self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN)

# cycles are non-deterministic as to which path is returned, but
# it will always be the fully resolved path of one member of the cycle
os.symlink(ABSTFN + "1", ABSTFN + "2")
os.symlink(ABSTFN + "2", ABSTFN + "1")
expected = (P + ABSTFN + "1", P + ABSTFN + "2")
self.assertIn(ntpath.realpath(ABSTFN + "1"), expected)
self.assertIn(ntpath.realpath(ABSTFN + "2"), expected)

self.assertIn(ntpath.realpath(ABSTFN + "1\\x"),
(ntpath.join(r, "x") for r in expected))
self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."),
ntpath.dirname(ABSTFN))
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
ntpath.dirname(P + ABSTFN) + "\\x")
os.symlink(ABSTFN + "x", ABSTFN + "y")
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\"
+ ntpath.basename(ABSTFN) + "y"),
P + ABSTFN + "x")
self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\"
+ ntpath.basename(ABSTFN) + "1"),
expected)

os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a")

os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+ "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c")

# Test using relative path as well.
self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN)

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_prefix(self):
ABSTFN = ntpath.abspath(support.TESTFN)
self.addCleanup(support.unlink, ABSTFN + "3")
self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.")
self.addCleanup(support.unlink, ABSTFN + "3link")
self.addCleanup(support.unlink, ABSTFN + "3.link")

with open(ABSTFN + "3", "wb") as f:
f.write(b'0')
os.symlink(ABSTFN + "3", ABSTFN + "3link")

with open("\\\\?\\" + ABSTFN + "3.", "wb") as f:
f.write(b'1')
os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")

self.assertEqual(ntpath.realpath(ABSTFN + "3link"),
ABSTFN + "3")
self.assertEqual(ntpath.realpath(ABSTFN + "3.link"),
"\\\\?\\" + ABSTFN + "3.")

# Resolved paths should be usable to open target files
with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f:
self.assertEqual(f.read(), b'0')
with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f:
self.assertEqual(f.read(), b'1')

# When the prefix is included, it is not stripped
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
"\\\\?\\" + ABSTFN + "3")
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
"\\\\?\\" + ABSTFN + "3.")

def test_expandvars(self):
with support.EnvironmentVarGuard() as env:
env.clear()
Expand Down Expand Up @@ -288,11 +480,11 @@ def test_abspath(self):

def test_relpath(self):
tester('ntpath.relpath("a")', 'a')
tester('ntpath.relpath(os.path.abspath("a"))', 'a')
tester('ntpath.relpath(ntpath.abspath("a"))', 'a')
tester('ntpath.relpath("a/b")', 'a\\b')
tester('ntpath.relpath("../a/b")', '..\\a\\b')
with support.temp_cwd(support.TESTFN) as cwd_dir:
currentdir = os.path.basename(cwd_dir)
currentdir = ntpath.basename(cwd_dir)
tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
Expand Down Expand Up @@ -417,7 +609,7 @@ def test_ismount(self):
# locations below cannot then refer to mount points
#
drive, path = ntpath.splitdrive(sys.executable)
with support.change_cwd(os.path.dirname(sys.executable)):
with support.change_cwd(ntpath.dirname(sys.executable)):
self.assertFalse(ntpath.ismount(drive.lower()))
self.assertFalse(ntpath.ismount(drive.upper()))

Expand Down
5 changes: 1 addition & 4 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -3358,10 +3358,7 @@ def test_oserror_filename(self):
if hasattr(os, "lchmod"):
funcs.append((self.filenames, os.lchmod, 0o777))
if hasattr(os, "readlink"):
if sys.platform == "win32":
funcs.append((self.unicode_filenames, os.readlink,))
else:
funcs.append((self.filenames, os.readlink,))
funcs.append((self.filenames, os.readlink,))


for filenames, func, *func_args in funcs:
Expand Down
Loading