Skip to content

Commit e3b2c85

Browse files
gh-134873: Fix quadratic complexity in os.path.expandvars()
1 parent 8e8786f commit e3b2c85

File tree

5 files changed

+97
-116
lines changed

5 files changed

+97
-116
lines changed

Lib/ntpath.py

Lines changed: 41 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -400,17 +400,23 @@ def expanduser(path):
400400
# XXX With COMMAND.COM you can use any characters in a variable name,
401401
# XXX except '^|<>='.
402402

403+
_varpattern = r"'[^']*'?|%(%|[^%]*%?)|\$(\$|[-\w]+|\{[^}]*\}?)"
404+
_varsub = None
405+
_varsubb = None
406+
403407
def expandvars(path):
404408
"""Expand shell variables of the forms $var, ${var} and %var%.
405409
406410
Unknown variables are left unchanged."""
407411
path = os.fspath(path)
412+
global _varsub, _varsubb
408413
if isinstance(path, bytes):
409414
if b'$' not in path and b'%' not in path:
410415
return path
411-
import string
412-
varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
413-
quote = b'\''
416+
if not _varsubb:
417+
import re
418+
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
419+
sub = _varsubb
414420
percent = b'%'
415421
brace = b'{'
416422
rbrace = b'}'
@@ -419,94 +425,44 @@ def expandvars(path):
419425
else:
420426
if '$' not in path and '%' not in path:
421427
return path
422-
import string
423-
varchars = string.ascii_letters + string.digits + '_-'
424-
quote = '\''
428+
if not _varsub:
429+
import re
430+
_varsub = re.compile(_varpattern, re.ASCII).sub
431+
sub = _varsub
425432
percent = '%'
426433
brace = '{'
427434
rbrace = '}'
428435
dollar = '$'
429436
environ = os.environ
430-
res = path[:0]
431-
index = 0
432-
pathlen = len(path)
433-
while index < pathlen:
434-
c = path[index:index+1]
435-
if c == quote: # no expansion within single quotes
436-
path = path[index + 1:]
437-
pathlen = len(path)
438-
try:
439-
index = path.index(c)
440-
res += c + path[:index + 1]
441-
except ValueError:
442-
res += c + path
443-
index = pathlen - 1
444-
elif c == percent: # variable or '%'
445-
if path[index + 1:index + 2] == percent:
446-
res += c
447-
index += 1
448-
else:
449-
path = path[index+1:]
450-
pathlen = len(path)
451-
try:
452-
index = path.index(percent)
453-
except ValueError:
454-
res += percent + path
455-
index = pathlen - 1
456-
else:
457-
var = path[:index]
458-
try:
459-
if environ is None:
460-
value = os.fsencode(os.environ[os.fsdecode(var)])
461-
else:
462-
value = environ[var]
463-
except KeyError:
464-
value = percent + var + percent
465-
res += value
466-
elif c == dollar: # variable or '$$'
467-
if path[index + 1:index + 2] == dollar:
468-
res += c
469-
index += 1
470-
elif path[index + 1:index + 2] == brace:
471-
path = path[index+2:]
472-
pathlen = len(path)
473-
try:
474-
index = path.index(rbrace)
475-
except ValueError:
476-
res += dollar + brace + path
477-
index = pathlen - 1
478-
else:
479-
var = path[:index]
480-
try:
481-
if environ is None:
482-
value = os.fsencode(os.environ[os.fsdecode(var)])
483-
else:
484-
value = environ[var]
485-
except KeyError:
486-
value = dollar + brace + var + rbrace
487-
res += value
488-
else:
489-
var = path[:0]
490-
index += 1
491-
c = path[index:index + 1]
492-
while c and c in varchars:
493-
var += c
494-
index += 1
495-
c = path[index:index + 1]
496-
try:
497-
if environ is None:
498-
value = os.fsencode(os.environ[os.fsdecode(var)])
499-
else:
500-
value = environ[var]
501-
except KeyError:
502-
value = dollar + var
503-
res += value
504-
if c:
505-
index -= 1
437+
438+
def repl(m):
439+
lastindex = m.lastindex
440+
if lastindex is None:
441+
return m[0]
442+
name = m[lastindex]
443+
if lastindex == 1:
444+
if name == percent:
445+
return name
446+
if not name.endswith(percent):
447+
return m[0]
448+
name = name[:-1]
506449
else:
507-
res += c
508-
index += 1
509-
return res
450+
if name == dollar:
451+
return name
452+
if name.startswith(brace):
453+
if not name.endswith(rbrace):
454+
return m[0]
455+
name = name[1:-1]
456+
457+
try:
458+
if environ is None:
459+
return os.fsencode(os.environ[os.fsdecode(name)])
460+
else:
461+
return environ[name]
462+
except KeyError:
463+
return m[0]
464+
465+
return sub(repl, path)
510466

511467

512468
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.

Lib/posixpath.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -284,56 +284,53 @@ def expanduser(path):
284284
# This expands the forms $variable and ${variable} only.
285285
# Non-existent variables are left unchanged.
286286

287-
_varprog = None
288-
_varprogb = None
287+
_varpattern = r'\$(\w+|\{[^}]*\}?)'
288+
_varsub = None
289+
_varsubb = None
289290

290291
def expandvars(path):
291292
"""Expand shell variables of form $var and ${var}. Unknown variables
292293
are left unchanged."""
293294
path = os.fspath(path)
294-
global _varprog, _varprogb
295+
global _varsub, _varsubb
295296
if isinstance(path, bytes):
296297
if b'$' not in path:
297298
return path
298-
if not _varprogb:
299+
if not _varsubb:
299300
import re
300-
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
301-
search = _varprogb.search
301+
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
302+
sub = _varsubb
302303
start = b'{'
303304
end = b'}'
304305
environ = getattr(os, 'environb', None)
305306
else:
306307
if '$' not in path:
307308
return path
308-
if not _varprog:
309+
if not _varsub:
309310
import re
310-
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
311-
search = _varprog.search
311+
_varsub = re.compile(_varpattern, re.ASCII).sub
312+
sub = _varsub
312313
start = '{'
313314
end = '}'
314315
environ = os.environ
315-
i = 0
316-
while True:
317-
m = search(path, i)
318-
if not m:
319-
break
320-
i, j = m.span(0)
321-
name = m.group(1)
322-
if name.startswith(start) and name.endswith(end):
316+
317+
def repl(m):
318+
name = m[1]
319+
if name.startswith(start):
320+
if not name.endswith(end):
321+
return m[0]
323322
name = name[1:-1]
324323
try:
325324
if environ is None:
326325
value = os.fsencode(os.environ[os.fsdecode(name)])
327326
else:
328327
value = environ[name]
329328
except KeyError:
330-
i = j
329+
return m[0]
331330
else:
332-
tail = path[j:]
333-
path = path[:i] + value
334-
i = len(path)
335-
path += tail
336-
return path
331+
return value
332+
333+
return sub(repl, path)
337334

338335

339336
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.

Lib/test/test_genericpath.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
import sys
88
import unittest
99
import warnings
10-
from test.support import (
11-
is_apple, os_helper, warnings_helper
12-
)
10+
from test import support
11+
from test.support import os_helper
12+
from test.support import warnings_helper
1313
from test.support.script_helper import assert_python_ok
1414
from test.support.os_helper import FakePath
1515

@@ -445,6 +445,19 @@ def check(value, expected):
445445
os.fsencode('$bar%s bar' % nonascii))
446446
check(b'$spam}bar', os.fsencode('%s}bar' % nonascii))
447447

448+
@support.requires_resource('cpu')
449+
def test_expandvars_large(self):
450+
expandvars = self.pathmodule.expandvars
451+
with os_helper.EnvironmentVarGuard() as env:
452+
env.clear()
453+
env["A"] = "B"
454+
n = 100_000
455+
self.assertEqual(expandvars('$A'*n), 'B'*n)
456+
self.assertEqual(expandvars('${A}'*n), 'B'*n)
457+
self.assertEqual(expandvars('$A!'*n), 'B!'*n)
458+
self.assertEqual(expandvars('${A}A'*n), 'BA'*n)
459+
self.assertEqual(expandvars('${'*10*n), '${'*10*n)
460+
448461
def test_abspath(self):
449462
self.assertIn("foo", self.pathmodule.abspath("foo"))
450463
with warnings.catch_warnings():
@@ -502,7 +515,7 @@ def test_nonascii_abspath(self):
502515
# directory (when the bytes name is used).
503516
and sys.platform not in {
504517
"win32", "emscripten", "wasi"
505-
} and not is_apple
518+
} and not support.is_apple
506519
):
507520
name = os_helper.TESTFN_UNDECODABLE
508521
elif os_helper.TESTFN_NONASCII:

Lib/test/test_ntpath.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import sys
77
import unittest
88
import warnings
9-
from test.support import TestFailed, cpython_only, os_helper
9+
from test import support
10+
from test.support import os_helper
1011
from test.support.os_helper import FakePath
1112
from test import test_genericpath
1213
from tempfile import TemporaryFile
@@ -56,7 +57,7 @@ def tester(fn, wantResult):
5657
fn = fn.replace("\\", "\\\\")
5758
gotResult = eval(fn)
5859
if wantResult != gotResult and _norm(wantResult) != _norm(gotResult):
59-
raise TestFailed("%s should return: %s but returned: %s" \
60+
raise support.TestFailed("%s should return: %s but returned: %s" \
6061
%(str(fn), str(wantResult), str(gotResult)))
6162

6263
# then with bytes
@@ -72,7 +73,7 @@ def tester(fn, wantResult):
7273
warnings.simplefilter("ignore", DeprecationWarning)
7374
gotResult = eval(fn)
7475
if _norm(wantResult) != _norm(gotResult):
75-
raise TestFailed("%s should return: %s but returned: %s" \
76+
raise support.TestFailed("%s should return: %s but returned: %s" \
7677
%(str(fn), str(wantResult), repr(gotResult)))
7778

7879

@@ -875,6 +876,19 @@ def check(value, expected):
875876
check('%spam%bar', '%sbar' % nonascii)
876877
check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii)
877878

879+
@support.requires_resource('cpu')
880+
def test_expandvars_large(self):
881+
expandvars = ntpath.expandvars
882+
with os_helper.EnvironmentVarGuard() as env:
883+
env.clear()
884+
env["A"] = "B"
885+
n = 100_000
886+
self.assertEqual(expandvars('%A%'*n), 'B'*n)
887+
self.assertEqual(expandvars('%A%A'*n), 'BA'*n)
888+
self.assertEqual(expandvars("''"*n + '%%'), "''"*n + '%')
889+
self.assertEqual(expandvars("%%"*n), "%"*n)
890+
self.assertEqual(expandvars("$$"*n), "$"*n)
891+
878892
def test_expanduser(self):
879893
tester('ntpath.expanduser("test")', 'test')
880894

@@ -1292,7 +1306,7 @@ def test_con_device(self):
12921306
self.assertTrue(os.path.exists(r"\\.\CON"))
12931307

12941308
@unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32")
1295-
@cpython_only
1309+
@support.cpython_only
12961310
def test_fast_paths_in_use(self):
12971311
# There are fast paths of these functions implemented in posixmodule.c.
12981312
# Confirm that they are being used, and not the Python fallbacks in
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix quadratic complexity in :func:`os.path.expandvars`.

0 commit comments

Comments
 (0)